From 47d17ecb1c8811a90debf5ac3f5034ef09be4b8b Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 22 Apr 2026 02:44:43 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-GGUF Source: Original Platform --- .gitattributes | 39 +++++++++ ...-4B-Qwen3.6-plus-Reasoning-Slerp-Q8_0.gguf | 3 + ...3-4B-Qwen3.6-plus-Reasoning-Slerp-f16.gguf | 3 + README.md | 85 +++++++++++++++++++ benchmark/Merged_Model.png | 3 + 5 files changed, 133 insertions(+) create mode 100644 .gitattributes create mode 100644 Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-Q8_0.gguf create mode 100644 Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-f16.gguf create mode 100644 README.md create mode 100644 benchmark/Merged_Model.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ce59451 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,39 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-f16.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-q8_0.gguf filter=lfs diff=lfs merge=lfs -text +benchmark/Merged_Model.png filter=lfs diff=lfs merge=lfs -text +Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-Q8_0.gguf b/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-Q8_0.gguf new file mode 100644 index 0000000..b23ffcf --- /dev/null +++ b/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa09cc7af03ba08e4ca9788508b7c81d70a2484303c35aff36b6f31c63f08bd +size 4280404640 diff --git a/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-f16.gguf b/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-f16.gguf new file mode 100644 index 0000000..1d7b29a --- /dev/null +++ b/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp-f16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25376c02beae139fd8e075baa1a04c584900dfb25de3a904818cdb97d099ab93 +size 8051284640 diff --git a/README.md b/README.md new file mode 100644 index 0000000..68606fd --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +--- +base_model: +- khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp +tags: +- mergekit +- merge +license: apache-2.0 +pipeline_tag: text-generation +language: +- en +datasets: +- khazarai/qwen3.6-plus-high-reasoning-500x +- khazarai/kimi-2.5-high-reasoning-250x +--- + +# khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp + + +![alt="General Benchmark Comparison Chart"](benchmark/Merged_Model.png) + +*Note: The sharp drop in "Creative Writing" is an expected and accepted trade-off to maximize extreme logical reasoning and coding precision.* + +This model is a highly experimental and optimized reasoning model created through a surgical SLERP merge of two powerful 4B reasoning models. The goal of this merge was to combine the deep analytical capabilities of Kimi with the mathematical and structural precision of Qwen, while mitigating the catastrophic forgetting commonly seen in SFT model merges. +After multiple iterations and layer-by-layer tensor analysis, we achieved a **"1+1=3 Synergy Effect"** in Logical Inference and Planning, outperforming both base models and the Qwen Thinking model. + + +### The "Golden Path" (V5) Strategy +Standard SLERP merges often destroy RAG capabilities and syntax adherence. To solve this, this model utilizes a custom merge configuration: + +1. **RAG/Vocabulary Fix:** `embed_tokens` and `lm_head` are strictly pinned to `1.0` (Qwen). The model reads and speaks purely using Qwen's vocabulary, completely eliminating the RAG degradation problem. +2. **Gradient Attention:** The intermediate attention and MLP layers follow a smooth gradient `[0, 0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1]` to prevent weight interference in deep reasoning steps. + +## Benchmark Performance (Multi-Domain Reasoning) + + +| Model | Score | +| :--- | :--- | +| **khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Slerp** | **77.18** | +| khazarai/Qwen3-4B-Kimi2.5-Reasoning-Distilled | 76.09 | +| khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled | 75.64 | +| Qwen/Qwen3-4B-Thinking-2507 | 73.73 | + +- **Benchmark**: khazarai/Multi-Domain-Reasoning-Benchmark +- **Total Questions**: 100 + + +## 💡 Intended Use Cases + +* **Ideal for:** Complex logical deductions, Python code debugging, mathematical problem-solving, and strict RAG (Retrieval-Augmented Generation) pipelines. +* **Not recommended for:** Creative writing, poetry, or highly imaginative storytelling. + + +### Models Merged + +The following models were included in the merge: +* [khazarai/Qwen3-4B-Kimi2.5-Reasoning-Distilled](https://huggingface.co/khazarai/Qwen3-4B-Kimi2.5-Reasoning-Distilled) +* [khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled](https://huggingface.co/khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled) + + +### Configuration + +The following YAML configuration was used to produce this model: + +```yaml + +models: + - model: khazarai/Qwen3-4B-Kimi2.5-Reasoning-Distilled + - model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled +merge_method: slerp +base_model: khazarai/Qwen3-4B-Kimi2.5-Reasoning-Distilled +parameters: + t: + - filter: embed_tokens + value: 1 + + - filter: lm_head + value: 1 + + - value: 1 + + - filter: self + value: [0, 0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 1] + +dtype: bfloat16 +``` \ No newline at end of file diff --git a/benchmark/Merged_Model.png b/benchmark/Merged_Model.png new file mode 100644 index 0000000..2982d3e --- /dev/null +++ b/benchmark/Merged_Model.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab02f4b4c12f0ff5f4977f8d0ae2a6721b92ccca7ee20720e96b96c16e7b5c2 +size 157595