初始化项目，由ModelHub XC社区提供模型

Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF Source: Original Platform
2026-04-10 23:57:57 +08:00
commit f10e17a997
9 changed files with 268 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,40 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 Qwen3-4B-Thinking-2507.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
 Qwen3-4B-Thinking-2507.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
 Qwen3-4B-Thinking-2507.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
 benchmark/evaluatedbyLLM.png filter=lfs diff=lfs merge=lfs -text
 benchmark/BaseModel.png filter=lfs diff=lfs merge=lfs -text
--- a/54
+++ b/54
@@ -0,0 +1,54 @@
 FROM Qwen3-4B-Thinking-2507.Q8_0.gguf
 TEMPLATE """
 {{- $lastUserIdx := -1 -}}
 {{- range $idx, $msg := .Messages -}}
 {{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}}
 {{- end }}
 {{- if or .System .Tools }}<|im_start|>system
 {{ if .System }}
 {{ .System }}
 {{- end }}
 {{- if .Tools }}
 # Tools
 You may call one or more functions to assist with the user query.
 You are provided with function signatures within <tools></tools> XML tags:
 <tools>
 {{- range .Tools }}
 {"type": "function", "function": {{ .Function }}}
 {{- end }}
 </tools>
 For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
 <tool_call>
 {"name": <function-name>, "arguments": <args-json-object>}
 </tool_call>
 {{- end -}}
 <|im_end|>
 {{ end }}
 {{- range $i, $_ := .Messages }}
 {{- $last := eq (len (slice $.Messages $i)) 1 -}}
 {{- if eq .Role "user" }}<|im_start|>user
 {{ .Content }}<|im_end|>
 {{ else if eq .Role "assistant" }}<|im_start|>assistant
 {{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}}
 <think>{{ .Thinking }}</think>
 {{ end -}}
 {{ if .Content }}{{ .Content }}
 {{- else if .ToolCalls }}<tool_call>
 {{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
 {{ end }}</tool_call>
 {{- end }}{{ if not $last }}<|im_end|>
 {{ end }}
 {{- else if eq .Role "tool" }}<|im_start|>user
 <tool_response>
 {{ .Content }}
 </tool_response><|im_end|>
 {{ end }}
 {{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
 {{ end }}
 {{- end }}
 """
--- a/Qwen3-4B-Thinking-2507.Q4_1.gguf
+++ b/Qwen3-4B-Thinking-2507.Q4_1.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:34b4b6a2ecb267031c2e77d060e3c6d1f9187b92b6184dbaa1609303631b3dad
 size 2596628704
--- a/Qwen3-4B-Thinking-2507.Q6_K.gguf
+++ b/Qwen3-4B-Thinking-2507.Q6_K.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:22c40e83a277d8020803c6947cf066162be2d3b4b2dccf303d5cd6ee23e437f2
 size 3306260704
--- a/Qwen3-4B-Thinking-2507.Q8_0.gguf
+++ b/Qwen3-4B-Thinking-2507.Q8_0.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:48b3ba219908d335b4e34b61694d968163f05727fab579da8daac5c9fdf37eb1
 size 4280404704
--- a/README.md
+++ b/README.md
@@ -0,0 +1,87 @@
 ---
 tags:
 - gguf
 - llama.cpp
 - unsloth
 license: apache-2.0
 datasets:
 - khazarai/qwen3.6-plus-high-reasoning-500x
 language:
 - en
 base_model:
 - khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled
 pipeline_tag: text-generation
 metrics:
 - accuracy
 ---
 # Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF : GGUF
 ## Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled
 ![alt="General Benchmark Comparison Chart"](benchmark/evaluatedbyLLM.png)
 - **Success Rate**: 75.64%
 ## Model: Qwen/Qwen3-4B-Thinking-2507
 ![alt="General Benchmark Comparison Chart"](benchmark/BaseModel.png)
 - **Success Rate**: 73.73%
 - **Benchmark**: khazarai/Multi-Domain-Reasoning-Benchmark
 - **Total Questions**: 100
 This is a reasoning-distilled variant of Qwen3-4B-Thinking, fine-tuned using LoRA via Unsloth to replicate the advanced reasoning capabilities of the larger Qwen3.6-plus teacher model.
 The distillation process focuses on reducing the "rambling" and "uncertainty" often found in smaller models during complex tasks, replacing them with concise, structured, and actionable solution paths.
 ## Reasoning Comparison: Base vs. Distilled
 The primary improvement in this model is the qualitative leap in reasoning structure. Below is a summary of the differences observed when solving complex graph problems (e.g., Shortest Path with Edge Reversals):
 **Base Model (Qwen3-4B-Thinking)**:
 - Style: Stream-of-consciousness, exploratory, and verbose.
 - Behavior: The model often talks to itself ("Hmm, interesting", "Wait, no"), struggles to interpret problem constraints correctly on the first try, and enters loops of self-correction. It mimics a student trying to figure out the problem as they speak.
 - Output: Contains high noise-to-signal ratio; solution paths are often buried under paragraphs of hesitation.
 **Distilled Model (Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled)**:
 - Style: Structured, professional, and report-oriented.
 - Behavior: The model analyzes the problem immediately, separates concerns (Input, Output, Constraints), and formulates a concrete algorithm plan (e.g., State-Space Dijkstra). It proceeds with confidence, avoiding logical dead-ends.
 - Output: Provides a clean breakdown: Problem Analysis -> Intuition -> Algorithm -> Complexity Analysis -> Pseudocode.
 **Verdict**: The distilled model transforms the raw potential of the base model into an engineering-grade tool.
 ## Model Specifications
 - **Base Model**: Qwen/Qwen3-4B-Thinking-2507
 - **Model Type**: Reasoning Distillation (QLoRA)
 - **Framework**: Unsloth
 - **Fine-tuning Method**: QLoRA (PEFT)
 - **Teacher Model**: Qwen3.6-plus
 - **Distillation Dataset**: khazarai/qwen3.6-plus-high-reasoning-500x
  - Total Tokens: 1,739,249
  - Max Sequence Length: 6,500 tokens
 ## Provided Quants
 (sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants)
 | Type | Size/GB | Notes |
 |:-----|--------:|:------|
 | Q4_K_1 | 2.3 |  |
 | Q6_K | 3.3 | very good quality |
 | Q8_0 | 4.2 | fast, best quality |
 | bf16 | 8.0 | 16 bpw, overkill |
 Here is a handy graph by ikawrakow comparing some lower-quality quant
 types (lower is better):
 ![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png)
 And here are Artefact2's thoughts on the matter:
 https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9
--- a/benchmark/BaseModel.png
+++ b/benchmark/BaseModel.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:fa98a37ffd0c8d8e6bb33782141973923e8fedb81353ca7e7ed219eecb972a30
 size 100587
--- a/benchmark/evaluatedbyLLM.png
+++ b/benchmark/evaluatedbyLLM.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:764f1590ef17221e029e1a13f764e46fcad35f3880b8554ae57a9bdfd02531a0
 size 102408
--- a/config.json
+++ b/config.json
@@ -0,0 +1,72 @@
 {
    "architectures": [
        "Qwen3ForCausalLM"
    ],
    "attention_bias": false,
    "attention_dropout": 0.0,
    "bos_token_id": null,
    "torch_dtype": "float16",
    "eos_token_id": 151645,
    "head_dim": 128,
    "hidden_act": "silu",
    "hidden_size": 2560,
    "initializer_range": 0.02,
    "intermediate_size": 9728,
    "layer_types": [
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention",
        "full_attention"
    ],
    "max_position_embeddings": 262144,
    "max_window_layers": 36,
    "model_type": "qwen3",
    "num_attention_heads": 32,
    "num_hidden_layers": 36,
    "num_key_value_heads": 8,
    "pad_token_id": 151669,
    "rms_norm_eps": 1e-06,
    "rope_parameters": {
        "rope_theta": 5000000,
        "rope_type": "default"
    },
    "sliding_window": null,
    "tie_word_embeddings": true,
    "unsloth_fixed": true,
    "unsloth_version": "2026.4.2",
    "use_cache": false,
    "use_sliding_window": false,
    "vocab_size": 151936
 }