From 9580d30532920376c992c8db65a9507ac83146e3 Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Tue, 12 May 2026 08:24:33 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: Daizee/Nanbeige4.1-3B-heretic
Source: Original Platform
---
 .gitattributes                   |  36 ++++
 README.md                        | 222 +++++++++++++++++++++++
 added_tokens.json                |   9 +
 chat_template.jinja              | 137 ++++++++++++++
 config.json                      |  32 ++++
 generation_config.json           |   7 +
 model-00001-of-00002.safetensors |   3 +
 model-00002-of-00002.safetensors |   3 +
 model.safetensors.index.json     | 299 +++++++++++++++++++++++++++++++
 special_tokens_map.json          |  33 ++++
 tokenizer.json                   |   3 +
 tokenizer.model                  |   3 +
 tokenizer_config.json            | 102 +++++++++++
 13 files changed, 889 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 README.md
 create mode 100644 added_tokens.json
 create mode 100644 chat_template.jinja
 create mode 100644 config.json
 create mode 100644 generation_config.json
 create mode 100644 model-00001-of-00002.safetensors
 create mode 100644 model-00002-of-00002.safetensors
 create mode 100644 model.safetensors.index.json
 create mode 100644 special_tokens_map.json
 create mode 100644 tokenizer.json
 create mode 100644 tokenizer.model
 create mode 100644 tokenizer_config.json

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..52373fe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..abc7399
--- /dev/null
+++ b/README.md
@@ -0,0 +1,222 @@
+---
+license: apache-2.0
+language:
+- en
+- zh
+library_name: transformers
+pipeline_tag: text-generation
+tags:
+- llm
+- nanbeige
+- heretic
+- uncensored
+- decensored
+- abliterated
+base_model:
+- Nanbeige/Nanbeige4-3B-Base
+---
+# This is a decensored version of [Nanbeige/Nanbeige4.1-3B](https://huggingface.co/Nanbeige/Nanbeige4.1-3B), made using [Heretic](https://github.com/p-e-w/heretic) v1.2.0
+
+## Abliteration parameters
+
+| Parameter | Value |
+| :-------- | :---: |
+| **direction_index** | 13.45 |
+| **attn.o_proj.max_weight** | 1.02 |
+| **attn.o_proj.max_weight_position** | 22.93 |
+| **attn.o_proj.min_weight** | 0.73 |
+| **attn.o_proj.min_weight_distance** | 6.77 |
+| **mlp.down_proj.max_weight** | 1.25 |
+| **mlp.down_proj.max_weight_position** | 29.81 |
+| **mlp.down_proj.min_weight** | 0.99 |
+| **mlp.down_proj.min_weight_distance** | 18.35 |
+
+## Performance
+
+| Metric | This model | Original model ([Nanbeige/Nanbeige4.1-3B](https://huggingface.co/Nanbeige/Nanbeige4.1-3B)) |
+| :----- | :--------: | :---------------------------: |
+| **KL divergence** | 0.0011 | 0 *(by definition)* |
+| **Refusals** | 1/100 | 97/100 |
+
+-----
+
+<div align="center">
+
+<img src="figures/nbg.png" width="220" alt="Nanbeige Logo">
+</div>
+
+
+
+# Introduction
+
+Nanbeige4.1-3B is built upon Nanbeige4-3B-Base and represents an enhanced iteration of our previous reasoning model, Nanbeige4-3B-Thinking-2511, achieved through further post-training optimization with supervised fine-tuning (SFT) and reinforcement learning (RL). As a highly competitive open-source model at a small parameter scale, Nanbeige4.1-3B illustrates that compact models can simultaneously achieve robust **reasoning**, **preference alignment**, and **effective agentic behaviors**.
+
+<div align="center">
+
+<img src="figures/model_performance_comparison.png">
+</div>
+
+Specifically, Nanbeige4.1-3B exhibits the following key strengths:
+
+* **Strong Reasoning:** Nanbeige4.1-3B is capable of solving complex, multi-step problems through sustained and coherent reasoning within a single forward pass, and reliably produces correct final answers on challenging tasks such as LiveCodeBench-Pro, IMO-Answer-Bench, and AIME 2026 I.
+* **Robust Preference Alignment:** Nanbeige4.1-3B achieves solid alignment performance, outperforming not only same-scale models such as Qwen3-4B-2507 and Nanbeige4-3B-2511, but also substantially larger models including Qwen3-30B-A3B and Qwen3-32B on Arena-Hard-v2 and Multi-Challenge.
+* **Agentic Capability:** Nanbeige4.1-3B is the first general small model to natively support deep-search tasks and reliably sustain complex problem solving involving more than 500 rounds of tool invocations. It fills a long-standing gap in the small-model ecosystem where models are typically optimized for either general reasoning or agentic scenarios, but rarely excel at both.
+  
+> **Technical Report:** [Link](https://huggingface.co/Nanbeige/Nanbeige4.1-3B/blob/main/Nanbeige4.1-3B-Report.pdf)
+
+
+
+
+# Performances
+
+We evaluate Nanbeige4.1-3B across a broad and diverse set of benchmarks covering **general reasoning**, and **deep-search capabilities**. 
+
+### General Reasoning Tasks
+
+On general reasoning tasks including **code**, **math**, **science**, **alignment**, and **tool-use** benchmarks, Nanbeige4.1-3B not only significantly outperforms same-scale models such as **Qwen3-4B**, but also demonstrates overall superior performance compared to larger models including **Qwen3-30B-A3B-2507** and **Qwen3-32B**.
+
+
+| Benchmark                   | Qwen3-4B-2507 | Qwen3-8B | Qwen3-14B | Qwen3-32B | Qwen3-30B-A3B-2507 | Nanbeige4-3B-2511 | **Nanbeige4.1-3B** |
+| --------------------------- | ------------- | -------- | --------- | --------- | ------------------ | ----------------- | ------------------ |
+| **Code**                    |               |          |           |           |                    |                   |                    |
+| Live-Code-Bench-V6          | 57.4          |  49.4    |   55.9    | 55.7      | <u>66.0<u>         | 46.0              | **76.9**           |
+| Live-Code-Bench-Pro-Easy    | 40.2             |   41.2   |   33.0    | 42.3      | <u>60.8<u>         | 40.2              | **81.4**           |
+| Live-Code-Bench-Pro-Mediium | 5.3             |   3.5    |    1.8    | 3.5       | 3.5                | <u>5.3<u>         | **28.1**           |
+| **Math**                    |               |          |           |           |                    |                   |                    |
+| AIME 2026 I                   | 81.46         | 70.42    | 76.46     | 75.83     | <u>87.30<u>        | 84.1              | **87.40**          |
+| HMMT Nov                    | 68.33         |  48.33   |  56.67    | 57.08     | <u>71.25<u>        | 66.67             | **77.92**          |
+| IMO-Answer-Bench            | 48.00         |   36.56  | 41.81     | 43.94     | **54.34**          | 38.25             | 53.38              |
+| **Science**                 |               |          |           |           |                    |                   |                    |
+| GPQA                        | 65.8          |   62.0   |    63.38  | 68.4      | 73.4               | <u>82.2<u>        | **83.8**           |
+| HLE (Text-only)             | 6.72          | 5.28     |   7.00    | 9.31      | <u>11.77<u>        | 10.98             | **12.60**          |
+| **Alignment**               |               |          |           |           |                    |                   |                    |
+| Arena-Hard-v2               | 34.9          |  26.3    |   36.9    | 56.0      | <u>60.2<u>         | 60.0              | **73.2**           |
+| Multi-Challenge             | 41.14         |  36.30   |   36.97   | 38.72     | <u>49.40<u>        | 41.20             | **52.21**          |
+| **Tool Use**                |               |          |           |           |                    |                   |                    |
+| BFCL-V4             | 44.87         |   42.20  |  45.14   |  47.90     |     48.6           | <u>53.8<u>        | **56.50**          |
+| Tau2-Bench                 |  45.9        | 42.06    |  44.96    |    45.26  |  <u> 47.70<u>   |    41.77         |    **48.57**           |
+
+
+
+### Deep Search Tasks
+
+As a general small model, Nanbeige4.1-3B achieves deep-search performance comparable to specialized agents under 10B parameters. 
+In contrast to existing small general models, which typically exhibit little to no deep-search capability, Nanbeige4.1-3B represents a substantial qualitative improvement over prior small general models.
+
+#### Deep Search and Agent Benchmarks
+| Model | xBench-DeepSearch-2505 | xBench-DeepSearch-2510 | Browse-Comp | Browse-Comp-ZH | GAIA (Text-only) | HLE | SEAL-0 |
+|------|-------------------|-------------------|-------------|----------------|------------------|-----|--------|
+| **Search-Specialized Small Agents** ||||||||
+| MiroThinker-v1.0-8B | 61 | – | 31.1 | 40.2 | 66.4 | 21.5 | 40.4 |
+| AgentCPM-Explore-4B | 70 | – | 25.0 | 29.0 | 63.9 | 19.1 | 40.0 |
+| **Large Foundation Models (with Tools)** ||||||||
+| GLM-4.6-357B | 70 | – | 45.1 | 49.5 | 71.9 | 30.4 | – |
+| Minimax-M2-230B | 72 | – | 44.0 | 48.5 | 75.7 | 31.8 | – |
+| DeepSeek-V3.2-671B | 71 | – | 67.6 | 65.0 | 63.5 | 40.8 | 38.5 |
+| **Small Foundation Models (with Tools)** ||||||||
+| Qwen3-4B-2507 | 34 | 5 | 1.57 | 7.92 | 28.33 | 11.13 | <u>15.74<u> |
+| Qwen3-8B | 31 | 2 | 0.79 | 5.15 | 19.53 | 10.24 | 6.34 |
+| Qwen3-14B | 34 | 9 | 2.36 | 7.11 | 30.23 | 10.17 | 12.64 |
+| Qwen3-32B | <u>39<u> | 8 | <u>3.15<u> | <u>7.34<u> | 30.17 | 9.26 | 8.15 |
+| Qwen3-30B-A3B-2507 | 25 | 10| 1.57 | 4.12 | <u>31.63<u> | <u>14.81<u> | 9.24 |
+| **Ours (with Tools)** ||||||||
+| Nanbeige4-3B-2511 | 33 | <u>11<u>  | 0.79 | 3.09 | 19.42 | 13.89 | 12.61 |
+| **Nanbeige4.1-3B** | **75** | **39** | **19.12** | **31.83** | **69.90** | **22.29** | **41.44** |
+
+
+## <span id="Inference">Quickstart</span>
+
+For inference hyperparameters, we recommend the following settings:
+* Temperature: 0.6
+* Top-p: 0.95
+* Repeat penalty: 1.0
+* Max New Tokens: 131072
+
+For the chat scenario:
+```
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+  'Nanbeige/Nanbeige4.1-3B',
+  use_fast=False,
+  trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+  'Nanbeige/Nanbeige4.1-3B',
+  torch_dtype='auto',
+  device_map='auto',
+  trust_remote_code=True
+)
+messages = [
+  {'role': 'user', 'content': 'Which number is bigger, 9.11 or 9.8?'}
+]
+prompt = tokenizer.apply_chat_template(
+  messages,
+  add_generation_prompt=True,
+  tokenize=False
+)
+input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
+output_ids = model.generate(input_ids.to('cuda'), eos_token_id=166101)
+resp = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
+print(resp)
+```
+
+For the tool use scenario:
+```
+from transformers import AutoModelForCausalLM, AutoTokenizer
+tokenizer = AutoTokenizer.from_pretrained(
+  'Nanbeige/Nanbeige4.1-3B',
+  use_fast=False,
+  trust_remote_code=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+  'Nanbeige/Nanbeige4.1-3B',
+  torch_dtype='auto',
+  device_map='auto',
+  trust_remote_code=True
+)
+messages = [
+    {'role': 'user',  'content': 'Help me check the weather in Beijing now'}
+]
+tools = [{'type': 'function',
+  'function': {'name': 'SearchWeather',
+   'description': 'Find out the current weather in a place on a certain day.',
+   'parameters': {'type': 'dict',
+    'properties': {'location': {'type': 'string',
+      'description': 'A city in China.'},
+    'required': ['location']}}}}]
+prompt = tokenizer.apply_chat_template(
+  messages,
+  tools,
+  add_generation_prompt=True,
+  tokenize=False
+)
+input_ids = tokenizer(prompt, add_special_tokens=False, return_tensors='pt').input_ids
+output_ids = model.generate(input_ids.to('cuda'), eos_token_id=166101)
+resp = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
+print(resp)
+```
+
+For the deep-search scenario:
+
+* Inference Framework: [**miroflow-framework**](https://github.com/MiroMindAI/MiroThinker)!
+* Switch tokenizer configuration to **tokenizer_config_search.json**
+* Tools Configuration:
+
+| Server                      | Description                                                                 | Tools Provided                                                                 |
+|-----------------------------|-----------------------------------------------------------------------------|-------------------------------------------------------------------------------|
+| tool-python                 | Execution environment and file management ([E2B sandbox](https://e2b.dev/)) | create_sandbox, run_command, run_python_code, upload_file_from_local_to_sandbox, download_file_from_sandbox_to_local, download_file_from_internet_to_sandbox |
+| search_and_scrape_webpage   | Google search via [Serper API](https://google.serper.dev)                   | google_search                                                                 |
+| jina_scrape_llm_summary     | Web scraping with LLM-based information extraction with [Jina](https://r.jina.ai) | scrape_and_extract_info                                                       |
+
+* Summary model: Qwen3-14B-thinking
+* Temperature: 1.0
+* Note, access to HuggingFace has been explicitly disabled in these tools.
+
+# <span id="Limitations">Limitations</span>
+
+While we place great emphasis on the safety of the model during the training process, striving to ensure that its outputs align with ethical and legal requirements, it may not completely avoid generating unexpected outputs due to the model's size and probabilistic nature. These outputs may include harmful content such as bias or discrimination. Please don't propagate such content. We do not assume any responsibility for the consequences resulting from the dissemination of inappropriate information.
+<br>
+
+# <span id="Limitations">Contact</span>
+If you have any questions, please raise an issue or contact us at nanbeige@kanzhun.com.
+<br>
\ No newline at end of file
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000..8427ce9
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,9 @@
+{
+  "</think>": 166104,
+  "</tool_call>": 166106,
+  "<think>": 166103,
+  "<tool_call>": 166105,
+  "<|endoftext|>": 166102,
+  "<|im_end|>": 166101,
+  "<|im_start|>": 166100
+}
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000..789492b
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,137 @@
+
+        {%- if tools %}
+            {{- '<|im_start|>system
+' }}
+            {%- if messages[0].role == 'system' %}
+                {{- messages[0].content + '
+
+' }}
+            {%- else %} 
+                {{- '你是一位工具函数调用专家，你会得到一个问题和一组可能的工具函数。根据问题，你需要进行一个或多个函数/工具调用以实现目的，请尽量尝试探索通过工具解决问题。
+如果没有一个函数可以使用，请直接使用自然语言回复用户。
+如果给定的问题缺少函数所需的参数，请使用自然语言进行提问，向用户询问必要信息。
+如果调用结果已经足够回答用户问题，请对历史结果进行总结，使用自然语言回复用户。' }} 
+            {%- endif %}
+            {{- "# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>" }}
+            {%- for tool in tools %}
+                {{- "
+" }}
+                {{- tool | tojson }}
+            {%- endfor %}
+            {{- "
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{\"name\": <function-name>, \"arguments\": <args-json-object>}
+</tool_call><|im_end|>
+" }}
+        {%- else %}
+            {%- if messages[0].role == 'system' %}
+                {{- '<|im_start|>system
+' + messages[0].content + '<|im_end|>
+' }}
+            {%- else %} 
+                {{- '<|im_start|>system
+你是南北阁，一款由BOSS直聘自主研发并训练的专业大语言模型。<|im_end|>
+' }} 
+            {%- endif %}
+        {%- endif %}
+        {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+        {%- for message in messages[::-1] %}
+            {%- set index = (messages|length - 1) - loop.index0 %}
+            {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+                {%- set ns.multi_step_tool = false %}
+                {%- set ns.last_query_index = index %}
+            {%- endif %}
+        {%- endfor %}
+        {%- for message in messages %}
+            {%- if message.content is string %}
+                {%- set content = message.content %}
+            {%- else %}
+                {%- set content = '' %}
+            {%- endif %}
+            {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+                {{- '<|im_start|>' + message.role + '
+' + content + '<|im_end|>' + '
+' }}
+            {%- elif message.role == "assistant" %}
+                {%- set reasoning_content = '' %}
+                {%- if message.reasoning_content is string %}
+                    {%- set reasoning_content = message.reasoning_content %}
+                {%- else %}
+                    {%- if '</think>' in content %}
+                        {%- set reasoning_content = content.split('</think>')[0].rstrip('
+').split('<think>')[-1].lstrip('
+') %}
+                        {%- set content = content.split('</think>')[-1].lstrip('
+') %}
+                    {%- endif %}
+                {%- endif %}
+                {%- if loop.index0 > ns.last_query_index or keep_all_think or (extra_body is defined and extra_body.keep_all_think) %}
+                    {%- if loop.last or (not loop.last and reasoning_content) %}
+                        {{- '<|im_start|>' + message.role + '
+<think>
+' + reasoning_content.strip('
+') + '
+</think>
+
+' + content.lstrip('
+') }}
+                    {%- else %}
+                        {{- '<|im_start|>' + message.role + '
+' + content }}
+                    {%- endif %}
+                {%- else %}
+                    {{- '<|im_start|>' + message.role + '
+' + content }}
+                {%- endif %}
+                {%- if message.tool_calls %}
+                    {%- for tool_call in message.tool_calls %}
+                        {%- if (loop.first and content) or (not loop.first) %}
+                            {{- '
+' }}
+                        {%- endif %}
+                        {%- if tool_call.function %}
+                            {%- set tool_call = tool_call.function %}
+                        {%- endif %}
+                        {{- '<tool_call>
+{"name": "' }}
+                        {{- tool_call.name }}
+                        {{- '", "arguments": ' }}
+                        {%- if tool_call.arguments is string %}
+                            {{- tool_call.arguments }}
+                        {%- else %}
+                            {{- tool_call.arguments | tojson }}
+                        {%- endif %}
+                        {{- '}
+</tool_call>' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '<|im_end|>
+' }}
+            {%- elif message.role == "tool" %}
+                {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+                    {{- '<|im_start|>user' }}
+                {%- endif %}
+                {{- '
+<tool_response>
+' }}
+                {{- content }}
+                {{- '
+</tool_response>' }}
+                {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+                    {{- '<|im_end|>
+' }}
+                {%- endif %}
+            {%- endif %}
+        {%- endfor %}
+        {%- if add_generation_prompt %}
+            {{- '<|im_start|>assistant
+' }}
+        {%- endif %}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..aac5946
--- /dev/null
+++ b/config.json
@@ -0,0 +1,32 @@
+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 166100,
+  "dtype": "bfloat16",
+  "embd_pdrop": 0.0,
+  "eos_token_id": 166101,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 10496,
+  "max_position_embeddings": 262144,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 20,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 4,
+  "pad_token_id": 0,
+  "pretraining_tp": 1,
+  "resid_pdrop": 0.0,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 70000000,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.3",
+  "use_cache": true,
+  "vocab_size": 166144
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..365f145
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 166100,
+  "eos_token_id": 166101,
+  "pad_token_id": 0,
+  "transformers_version": "4.57.3"
+}
diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors
new file mode 100644
index 0000000..a86688b
--- /dev/null
+++ b/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a51bf38906dd490ebba8d75308141caf06879cea5ab87988a7959f9bf5236cfd
+size 4982284416
diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors
new file mode 100644
index 0000000..3db700c
--- /dev/null
+++ b/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dab93cb5381bc6a5ec4e885815940e6d2e1594c9c55173c64b727b23c66b982
+size 2885023640
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000..90a145d
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,299 @@
+{
+  "metadata": {
+    "total_parameters": 3933637120,
+    "total_size": 7867274240
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00002-of-00002.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.norm.weight": "model-00002-of-00002.safetensors"
+  }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..e10b31f
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,33 @@
+{
+  "additional_special_tokens": [
+    "<|endoftext|>"
+  ],
+  "bos_token": {
+    "content": "<|im_start|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..4b71f73
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2147e0d09056cc65321353dd2990de81fbc92232a25cf1be8b52b716e5c57ffd
+size 18451276
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000..5dc56ce
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb41d04798b714520a9b075727b0226538b7330254299062742c50ec8374bc36
+size 2782298
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..a447dab
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,102 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "166100": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "166101": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "166102": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "166103": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166104": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166105": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166106": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>"
+  ],
+  "bos_token": "<|im_start|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}