初始化项目，由ModelHub XC社区提供模型

Model: ertghiu256/Qwen3-4b-2507-Thinking-math-and-code Source: Original Platform
2026-05-06 13:04:52 +08:00
commit 2110f55e21
13 changed files with 152377 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,26 @@
+---
+base_model:
+- Qwen/Qwen3-4B-Thinking-2507
+tags:
+- text-generation-inference
+- transformers
+- unsloth
+- qwen3
+license: apache-2.0
+language:
+- en
+---
+## Qwen 3 4b 2507 Thinking Math & Code
+
+
+### Uploaded finetuned model
+
+- **Developed by:** ertghiu256
+- **License:** apache-2.0
+- **Finetuned from model :** unsloth/qwen3-4b-thinking-2507-unsloth-bnb-4bit
+- **Other config :** `dataset = "ertghiu256/MathReasoning-with-code-samples", max_steps = 150, learning_rate = 6e-5`
+
+This qwen3 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
+
+<!-- [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
+ -->
--- a/added_tokens.json
+++ b/added_tokens.json
@@ -0,0 +1,28 @@
+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}
--- a/chat_template.jinja
+++ b/chat_template.jinja
@@ -0,0 +1,141 @@
+{{- "<|im_start|>system\n" -}}
+{%- if tools -%}
+	{%- if messages[0].role == "system" -%}
+		{{- messages[0].content + "\n\n" -}}
+	{%- endif -%}
+	{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" -}}
+	{%- for tool in tools -%}
+		{{- "\n" -}}
+		{{- tool | tojson -}}
+	{%- endfor -%}
+	{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>\n" -}}
+{%- elif messages[0].role == "system" -%}
+	{{- messages[0].content -}}
+{%- endif -%}
+{{- "# Commands: \n- /think or think: Think or reason step-by-step using a long chain-of-thought and self-reflection inside think tags. If this command is present, ALWAYS start your thinking or reasoning with '<think>\\nSince the user used the \"/think\" command, I will:\\nThink step-by-step with **long chain-of-thought** and **self-reflection**. I will only respond when I'm 100% sure. I will provide the final response after closing this thinking section.'\n- /shortthink or shortthink: Think or reason step-by-step briefly and shortly inside think tags. If this command is present, ALWAYS start your thinking or reasoning with '<think>\\nSince the user used the \"/shortthink\" command, I will:\\nThink step-by-step about the user's query briefly and shortly. I will only respond when I'm more than 50% sure. I will provide the final response after closing this thinking section.' \n- /nothink or nothink: Immediate response and you are FORBIDDEN FROM THINKING or REASONING, without any DOUBLE-CHECKING, and WITHOUT DRAFTING your response, just an immediate response WITH NO REPETITION. If this command is present, ALWAYS start respond with '<think>\\nSince the user used the \"/nothink\" command, I will:\\nImmediately give my final response without any thinking or self-reflection. Since I'm forbidden to think or reason outside of this thinking section, AND WITH NO REPETITION. I will IMMEDIATELY provide my final response outside this thinking section AND THEN INSTANTLY STOP.\\n</think>'\n- /clear: Forgets EVERYTHING (including previous messages, topics, etc.) that came before this command got executed.\nDo not take these commands into consideration, instead just do as they instruct.\nALWAYS REMEMBER THESE COMMANDS. NO NEED TO REPEAT THIS PART OF THE SYSTEM INSTRUCTIONS (COMMANDS) AS IT IS USELESS AND UNIMPORTANT.\n" -}}
+{{- "\n<|im_end|>" -}}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages | length - 1, reasoning_mode="normal") -%}
+{%- for message in messages[::-1] -%}
+	{%- set index = messages | length - 1 - loop.index0 -%}
+	{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not (message.content.startswith("<tool_response>") and message.content.endswith("</tool_response>")) -%}
+		{%- set ns.multi_step_tool = false -%}
+		{%- set ns.last_query_index = index -%}
+	{%- endif -%}
+{%- endfor -%}
+{%- for message in messages -%}
+	{%- if message.content is string -%}
+		{%- set content = message.content -%}
+	{%- else -%}
+		{%- set content = "" -%}
+	{%- endif -%}
+	{%- if message.role == "user" or message.role == "system" and not loop.first -%}
+		{{- "<|im_start|>" + message.role + "\n" + content -}}
+		{%- if messages[0].role == "system" and "/nothink" in messages[0].content and (not ("/think" in messages[0].content) or not ("/shortthink" in messages[0].content)) or "/nothink" in content and not ("/think" in messages[0].content or "/shortthink" in messages[0].content) or (messages[loop.index+1] and "</think>" not in messages[loop.index+1].content) -%}
+			{%- set enable_thinking = false -%}
+			{%- set enable_short_thinking = false -%}
+			{%- set ns.reasoning_mode = "none" -%}
+			{%- if not ("think" in content) -%}
+				{{- " /nothink" -}}
+			{%- endif -%}
+		{%- elif messages[0].role == "system" and "/shortthink" in messages[0].content and (not ("/nothink" in messages[0].content) and not ("/think" in messages[0].content)) or "/shortthink" in content and not ("/think" in messages[0].content or "/shortthink" in messages[0].content) -%}
+			{%- set enable_thinking = true -%}
+			{%- set enable_short_thinking = true -%}
+			{%- set ns.reasoning_mode = "short" -%}
+			{%- if not ("think" in content) -%}
+				{{- " /shortthink" -}}
+			{%- endif -%}
+		{%- else -%}
+			{%- set enable_thinking = true -%}
+			{%- set enable_short_thinking = false -%}
+			{%- set ns.reasoning_mode = "normal" -%}
+			{%- if not ("think" in content) -%}
+				{{- " /think" -}}
+			{%- endif -%}
+		{%- endif -%}
+		{{- "<|im_end|>" + "\n" -}}
+	{%- elif message.role == "assistant" -%}
+		{%- set reasoning_content = "" -%}
+		{%- if "<think>\nSince the user used the" not in content.split("</think>")[0].split("<think>")[-1] -%}
+			{%- if ns.reasoning_mode == "none" -%}
+				{%- set reasoning_prefix = "" -%}
+			{%- elif ns.reasoning_mode == "short" -%}
+				{%- set reasoning_prefix = "Since the user used the \"/shortthink\" command, I will:\nThink step-by-step about the user's query briefly and shortly. I will only respond when I'm more than 50% sure. I will provide the final response after closing this thinking section. " -%}
+			{%- else -%}
+				{%- set reasoning_prefix = "Since the user used the \"/think\" command, I will:\nThink step-by-step with **long chain-of-thought** and **self-reflection**. I will only respond when I'm 100% sure. I will provide the final response after closing this thinking section. " -%}
+			{%- endif -%}
+		{%- endif -%}
+		{%- if message.reasoning_content is string -%}
+			{%- set reasoning_content = message.reasoning_content -%}
+		{%- else -%}
+			{%- if "</think>" in content -%}
+				{%- set reasoning_content = reasoning_prefix + content.split("</think>")[0].rstrip("\n").split("<think>")[-1].lstrip("\n") -%}
+				{%- set content = content.split("</think>")[-1].lstrip("\n") -%}
+			{%- endif -%}
+			{%- if "<think>" in content -%}
+				{%- set content = content | replace("<think>", " ") -%}
+			{%- endif -%}
+			{%- if "</think>" in content -%}
+				{%- set content = content | replace("</think>", " ") -%}
+			{%- endif -%}
+		{%- endif -%}
+		{{- "\n" -}}
+		{#  Apply truncation and break if the /shortthink command is active  #}
+		{%- if enable_short_thinking is true -%}
+			{%- set words = reasoning_content.split(" ") -%}
+			{%- if words | length > 300 -%}
+				{%- set truncated_reasoning = words[:150] | join(" ") + " ... truncated ... " + words[words | length - 150:words | length] | join(" ") -%}
+			{%- else -%}
+				{%- set truncated_reasoning = reasoning_content | join(" ") -%}
+			{%- endif -%}
+			{%- set reasoning_content = truncated_reasoning -%}
+		{%- endif -%}
+		{%- if loop.last or not loop.last and reasoning_content and enable_thinking == true -%}
+			{{- "<|im_start|>" + message.role + "\n<think>\n" + reasoning_content.strip("\n") + "\n</think>\n" + content.lstrip("\n") -}}
+		{%- else -%}
+			{{- "<|im_start|>" + message.role + "\n" + content -}}
+		{%- endif -%}
+		{%- if message.tool_calls -%}
+			{%- for tool_call in message.tool_calls -%}
+				{%- if loop.first and content or not loop.first -%}
+					{{- "\n" -}}
+				{%- endif -%}
+				{%- if tool_call.function -%}
+					{%- set tool_call = tool_call.function -%}
+				{%- endif -%}
+				{{- "<tool_call>\n{\"name\": \"" -}}
+				{{- tool_call.name -}}
+				{{- "\", \"arguments\": " -}}
+				{%- if tool_call.arguments is string -%}
+					{{- tool_call.arguments -}}
+				{%- else -%}
+					{{- tool_call.arguments | tojson -}}
+				{%- endif -%}
+				{{- "}\n</tool_call>" -}}
+			{%- endfor -%}
+		{%- endif -%}
+		{{- "<|im_end|>\n" -}}
+	{%- elif message.role == "tool" -%}
+		{%- if loop.first or messages[loop.index0 - 1].role != "tool" -%}
+			{{- "<|im_start|>user" -}}
+		{%- endif -%}
+		{{- "\n<tool_response>\n" -}}
+		{{- content -}}
+		{{- "\n</tool_response>" -}}
+		{%- if loop.last or messages[loop.index0 + 1].role != "tool" -%}
+			{{- "<|im_end|>\n" -}}
+		{%- endif -%}
+	{%- endif -%}
+{%- endfor -%}
+{%- set last_message = messages[messages | length - 1].content -%}
+{%- if add_generation_prompt -%}
+	{{- "<|im_start|>assistant\n" -}}
+	{%- if enable_thinking is defined and enable_thinking is false or "/nothink" in last_message or "nothink" in last_message or messages[0].role == "system" and "/nothink" in messages[0].content and (not ("/think" in messages[0].content) or not ("/shortthink" in messages[0].content)) -%}
+		{{- "<think>\nSince the user used the \"/nothink\" command, I will:\nImmediately give my final response without any thinking or self-reflection. Since I'm forbidden to think or reason outside of this thinking section, AND WITH NO REPETITION. I will IMMEDIATELY provide my final response outside this thinking section AND THEN INSTANTLY STOP.\n</think>" -}}
+	{%- elif enable_short_thinking is defined and enable_short_thinking is false or "/shortthink" in last_message or "shortthink" in last_message or messages[0].role == "system" and "/shortthink" in messages[0].content and not ("/think" in messages[0].content) -%}
+		{{- "<think>\nSince the user used the \"/shortthink\" command, I will:\nThink step-by-step about the user's query briefly and shortly. I will only respond when I'm more than 50% sure. I will provide the final response after closing this thinking section." -}}
+	{%- elif "/clear" in last_message -%}
+		{{- "<think>\nSince the user used the \"/clear\" command, I will:\n1. Forget everything above.\n2. Ignore everything that comes before this message.\n3. Start a fresh new conversation and greet the user.\n</think>" -}}
+	{%- else -%}
+		{{- "<think>\nSince the user used the \"/think\" command, I will:\nThink step-by-step with **long chain-of-thought** and **self-reflection**. I will only respond when I'm 100% sure. I will provide the final response after closing this thinking section." -}}
+	{%- endif -%}
+{%- endif -%}
--- a/config.json
+++ b/config.json
@@ -0,0 +1,70 @@
+{
+    "architectures": [
+        "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2560,
+    "initializer_range": 0.02,
+    "intermediate_size": 9728,
+    "layer_types": [
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention"
+    ],
+    "max_position_embeddings": 262144,
+    "max_window_layers": 36,
+    "model_type": "qwen3",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 36,
+    "num_key_value_heads": 8,
+    "pad_token_id": 151654,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 5000000,
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "torch_dtype": "float16",
+    "transformers_version": "4.55.4",
+    "unsloth_fixed": true,
+    "unsloth_version": "2025.10.1",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 151936
+}
--- a/merges.txt
+++ b/merges.txt
--- a/model-00001-of-00002.safetensors
+++ b/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de4361caa1c7cf12b113f7d153b2174c68390a19a95d4a95f37621beaba8b564
+size 4967215360
--- a/model-00002-of-00002.safetensors
+++ b/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6b739d183cf75d31ae8623a994627083067e50e8adb6b0e0f226441c0d8c8cd
+size 3077766632
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,406 @@
+{
+  "metadata": {
+    "total_parameters": 4022468096,
+    "total_size": 8044936192
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.20.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors"
+  }
+}
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,31 @@
+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|vision_pad|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
--- a/vocab.json
+++ b/vocab.json