初始化项目，由ModelHub XC社区提供模型

Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF Source: Original Platform
2026-04-10 23:57:57 +08:00
commit f10e17a997
9 changed files with 268 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,40 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Qwen3-4B-Thinking-2507.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-4B-Thinking-2507.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-4B-Thinking-2507.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
+benchmark/evaluatedbyLLM.png filter=lfs diff=lfs merge=lfs -text
+benchmark/BaseModel.png filter=lfs diff=lfs merge=lfs -text
--- a/54
+++ b/54
@@ -0,0 +1,54 @@
+
+FROM Qwen3-4B-Thinking-2507.Q8_0.gguf
+TEMPLATE """
+{{- $lastUserIdx := -1 -}}
+{{- range $idx, $msg := .Messages -}}
+{{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}}
+{{- end }}
+{{- if or .System .Tools }}<|im_start|>system
+{{ if .System }}
+{{ .System }}
+{{- end }}
+{{- if .Tools }}
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{{- range .Tools }}
+{"type": "function", "function": {{ .Function }}}
+{{- end }}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call>
+{{- end -}}
+<|im_end|>
+{{ end }}
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 -}}
+{{- if eq .Role "user" }}<|im_start|>user
+{{ .Content }}<|im_end|>
+{{ else if eq .Role "assistant" }}<|im_start|>assistant
+{{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}}
+<think>{{ .Thinking }}</think>
+{{ end -}}
+{{ if .Content }}{{ .Content }}
+{{- else if .ToolCalls }}<tool_call>
+{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+{{ end }}</tool_call>
+{{- end }}{{ if not $last }}<|im_end|>
+{{ end }}
+{{- else if eq .Role "tool" }}<|im_start|>user
+<tool_response>
+{{ .Content }}
+</tool_response><|im_end|>
+{{ end }}
+{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
+{{ end }}
+{{- end }}
+"""
--- a/Qwen3-4B-Thinking-2507.Q4_1.gguf
+++ b/Qwen3-4B-Thinking-2507.Q4_1.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34b4b6a2ecb267031c2e77d060e3c6d1f9187b92b6184dbaa1609303631b3dad
+size 2596628704
--- a/Qwen3-4B-Thinking-2507.Q6_K.gguf
+++ b/Qwen3-4B-Thinking-2507.Q6_K.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22c40e83a277d8020803c6947cf066162be2d3b4b2dccf303d5cd6ee23e437f2
+size 3306260704
--- a/Qwen3-4B-Thinking-2507.Q8_0.gguf
+++ b/Qwen3-4B-Thinking-2507.Q8_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48b3ba219908d335b4e34b61694d968163f05727fab579da8daac5c9fdf37eb1
+size 4280404704
--- a/README.md
+++ b/README.md
@@ -0,0 +1,87 @@
+---
+tags:
+- gguf
+- llama.cpp
+- unsloth
+license: apache-2.0
+datasets:
+- khazarai/qwen3.6-plus-high-reasoning-500x
+language:
+- en
+base_model:
+- khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled
+pipeline_tag: text-generation
+metrics:
+- accuracy
+---
+
+# Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF : GGUF
+
+
+## Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled
+
+![alt="General Benchmark Comparison Chart"](benchmark/evaluatedbyLLM.png)
+
+- **Success Rate**: 75.64%
+
+## Model: Qwen/Qwen3-4B-Thinking-2507
+
+![alt="General Benchmark Comparison Chart"](benchmark/BaseModel.png)
+
+- **Success Rate**: 73.73%
+
+- **Benchmark**: khazarai/Multi-Domain-Reasoning-Benchmark
+- **Total Questions**: 100
+
+  
+This is a reasoning-distilled variant of Qwen3-4B-Thinking, fine-tuned using LoRA via Unsloth to replicate the advanced reasoning capabilities of the larger Qwen3.6-plus teacher model.
+The distillation process focuses on reducing the "rambling" and "uncertainty" often found in smaller models during complex tasks, replacing them with concise, structured, and actionable solution paths.
+
+## Reasoning Comparison: Base vs. Distilled
+
+The primary improvement in this model is the qualitative leap in reasoning structure. Below is a summary of the differences observed when solving complex graph problems (e.g., Shortest Path with Edge Reversals):
+
+**Base Model (Qwen3-4B-Thinking)**:
+
+- Style: Stream-of-consciousness, exploratory, and verbose.
+- Behavior: The model often talks to itself ("Hmm, interesting", "Wait, no"), struggles to interpret problem constraints correctly on the first try, and enters loops of self-correction. It mimics a student trying to figure out the problem as they speak.
+- Output: Contains high noise-to-signal ratio; solution paths are often buried under paragraphs of hesitation.
+
+**Distilled Model (Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled)**:
+
+- Style: Structured, professional, and report-oriented.
+- Behavior: The model analyzes the problem immediately, separates concerns (Input, Output, Constraints), and formulates a concrete algorithm plan (e.g., State-Space Dijkstra). It proceeds with confidence, avoiding logical dead-ends.
+- Output: Provides a clean breakdown: Problem Analysis -> Intuition -> Algorithm -> Complexity Analysis -> Pseudocode.
+
+**Verdict**: The distilled model transforms the raw potential of the base model into an engineering-grade tool.
+
+
+## Model Specifications
+
+- **Base Model**: Qwen/Qwen3-4B-Thinking-2507
+- **Model Type**: Reasoning Distillation (QLoRA)
+- **Framework**: Unsloth
+- **Fine-tuning Method**: QLoRA (PEFT)
+- **Teacher Model**: Qwen3.6-plus
+- **Distillation Dataset**: khazarai/qwen3.6-plus-high-reasoning-500x
+  - Total Tokens: 1,739,249
+  - Max Sequence Length: 6,500 tokens
+
+## Provided Quants
+
+(sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants)
+
+| Type | Size/GB | Notes |
+|:-----|--------:|:------|
+| Q4_K_1 | 2.3 |  |
+| Q6_K | 3.3 | very good quality |
+| Q8_0 | 4.2 | fast, best quality |
+| bf16 | 8.0 | 16 bpw, overkill |
+
+Here is a handy graph by ikawrakow comparing some lower-quality quant
+types (lower is better):
+
+![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png)
+
+And here are Artefact2's thoughts on the matter:
+https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9
--- a/benchmark/BaseModel.png
+++ b/benchmark/BaseModel.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa98a37ffd0c8d8e6bb33782141973923e8fedb81353ca7e7ed219eecb972a30
+size 100587
--- a/benchmark/evaluatedbyLLM.png
+++ b/benchmark/evaluatedbyLLM.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:764f1590ef17221e029e1a13f764e46fcad35f3880b8554ae57a9bdfd02531a0
+size 102408
--- a/config.json
+++ b/config.json
@@ -0,0 +1,72 @@
+{
+    "architectures": [
+        "Qwen3ForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": null,
+    "torch_dtype": "float16",
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 2560,
+    "initializer_range": 0.02,
+    "intermediate_size": 9728,
+    "layer_types": [
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention",
+        "full_attention"
+    ],
+    "max_position_embeddings": 262144,
+    "max_window_layers": 36,
+    "model_type": "qwen3",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 36,
+    "num_key_value_heads": 8,
+    "pad_token_id": 151669,
+    "rms_norm_eps": 1e-06,
+    "rope_parameters": {
+        "rope_theta": 5000000,
+        "rope_type": "default"
+    },
+    "sliding_window": null,
+    "tie_word_embeddings": true,
+    "unsloth_fixed": true,
+    "unsloth_version": "2026.4.2",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 151936
+}