commit f10e17a99788f0a4be9fb33dfb69877671a9f2df Author: ModelHub XC Date: Fri Apr 10 23:57:57 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..2830dad --- /dev/null +++ b/.gitattributes @@ -0,0 +1,40 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Qwen3-4B-Thinking-2507.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-4B-Thinking-2507.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-4B-Thinking-2507.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text +benchmark/evaluatedbyLLM.png filter=lfs diff=lfs merge=lfs -text +benchmark/BaseModel.png filter=lfs diff=lfs merge=lfs -text diff --git a/Modelfile b/Modelfile new file mode 100644 index 0000000..eecc127 --- /dev/null +++ b/Modelfile @@ -0,0 +1,54 @@ + +FROM Qwen3-4B-Thinking-2507.Q8_0.gguf +TEMPLATE """ +{{- $lastUserIdx := -1 -}} +{{- range $idx, $msg := .Messages -}} +{{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}} +{{- end }} +{{- if or .System .Tools }}<|im_start|>system +{{ if .System }} +{{ .System }} +{{- end }} +{{- if .Tools }} + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{{- range .Tools }} +{"type": "function", "function": {{ .Function }}} +{{- end }} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } + +{{- end -}} +<|im_end|> +{{ end }} +{{- range $i, $_ := .Messages }} +{{- $last := eq (len (slice $.Messages $i)) 1 -}} +{{- if eq .Role "user" }}<|im_start|>user +{{ .Content }}<|im_end|> +{{ else if eq .Role "assistant" }}<|im_start|>assistant +{{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}} +{{ .Thinking }} +{{ end -}} +{{ if .Content }}{{ .Content }} +{{- else if .ToolCalls }} +{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}} +{{ end }} +{{- end }}{{ if not $last }}<|im_end|> +{{ end }} +{{- else if eq .Role "tool" }}<|im_start|>user + +{{ .Content }} +<|im_end|> +{{ end }} +{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant +{{ end }} +{{- end }} +""" \ No newline at end of file diff --git a/Qwen3-4B-Thinking-2507.Q4_1.gguf b/Qwen3-4B-Thinking-2507.Q4_1.gguf new file mode 100644 index 0000000..8f3f25d --- /dev/null +++ b/Qwen3-4B-Thinking-2507.Q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b4b6a2ecb267031c2e77d060e3c6d1f9187b92b6184dbaa1609303631b3dad +size 2596628704 diff --git a/Qwen3-4B-Thinking-2507.Q6_K.gguf b/Qwen3-4B-Thinking-2507.Q6_K.gguf new file mode 100644 index 0000000..64c74ec --- /dev/null +++ b/Qwen3-4B-Thinking-2507.Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c40e83a277d8020803c6947cf066162be2d3b4b2dccf303d5cd6ee23e437f2 +size 3306260704 diff --git a/Qwen3-4B-Thinking-2507.Q8_0.gguf b/Qwen3-4B-Thinking-2507.Q8_0.gguf new file mode 100644 index 0000000..e72c6c0 --- /dev/null +++ b/Qwen3-4B-Thinking-2507.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b3ba219908d335b4e34b61694d968163f05727fab579da8daac5c9fdf37eb1 +size 4280404704 diff --git a/README.md b/README.md new file mode 100644 index 0000000..6ca6295 --- /dev/null +++ b/README.md @@ -0,0 +1,87 @@ +--- +tags: +- gguf +- llama.cpp +- unsloth +license: apache-2.0 +datasets: +- khazarai/qwen3.6-plus-high-reasoning-500x +language: +- en +base_model: +- khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled +pipeline_tag: text-generation +metrics: +- accuracy +--- + +# Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF : GGUF + + +## Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled + +![alt="General Benchmark Comparison Chart"](benchmark/evaluatedbyLLM.png) + +- **Success Rate**: 75.64% + +## Model: Qwen/Qwen3-4B-Thinking-2507 + +![alt="General Benchmark Comparison Chart"](benchmark/BaseModel.png) + +- **Success Rate**: 73.73% + +- **Benchmark**: khazarai/Multi-Domain-Reasoning-Benchmark +- **Total Questions**: 100 + + +This is a reasoning-distilled variant of Qwen3-4B-Thinking, fine-tuned using LoRA via Unsloth to replicate the advanced reasoning capabilities of the larger Qwen3.6-plus teacher model. +The distillation process focuses on reducing the "rambling" and "uncertainty" often found in smaller models during complex tasks, replacing them with concise, structured, and actionable solution paths. + +## Reasoning Comparison: Base vs. Distilled + +The primary improvement in this model is the qualitative leap in reasoning structure. Below is a summary of the differences observed when solving complex graph problems (e.g., Shortest Path with Edge Reversals): + +**Base Model (Qwen3-4B-Thinking)**: + +- Style: Stream-of-consciousness, exploratory, and verbose. +- Behavior: The model often talks to itself ("Hmm, interesting", "Wait, no"), struggles to interpret problem constraints correctly on the first try, and enters loops of self-correction. It mimics a student trying to figure out the problem as they speak. +- Output: Contains high noise-to-signal ratio; solution paths are often buried under paragraphs of hesitation. + +**Distilled Model (Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled)**: + +- Style: Structured, professional, and report-oriented. +- Behavior: The model analyzes the problem immediately, separates concerns (Input, Output, Constraints), and formulates a concrete algorithm plan (e.g., State-Space Dijkstra). It proceeds with confidence, avoiding logical dead-ends. +- Output: Provides a clean breakdown: Problem Analysis -> Intuition -> Algorithm -> Complexity Analysis -> Pseudocode. + +**Verdict**: The distilled model transforms the raw potential of the base model into an engineering-grade tool. + + +## Model Specifications + +- **Base Model**: Qwen/Qwen3-4B-Thinking-2507 +- **Model Type**: Reasoning Distillation (QLoRA) +- **Framework**: Unsloth +- **Fine-tuning Method**: QLoRA (PEFT) +- **Teacher Model**: Qwen3.6-plus +- **Distillation Dataset**: khazarai/qwen3.6-plus-high-reasoning-500x + - Total Tokens: 1,739,249 + - Max Sequence Length: 6,500 tokens + +## Provided Quants + +(sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants) + +| Type | Size/GB | Notes | +|:-----|--------:|:------| +| Q4_K_1 | 2.3 | | +| Q6_K | 3.3 | very good quality | +| Q8_0 | 4.2 | fast, best quality | +| bf16 | 8.0 | 16 bpw, overkill | + +Here is a handy graph by ikawrakow comparing some lower-quality quant +types (lower is better): + +![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png) + +And here are Artefact2's thoughts on the matter: +https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9 \ No newline at end of file diff --git a/benchmark/BaseModel.png b/benchmark/BaseModel.png new file mode 100644 index 0000000..ccf978e --- /dev/null +++ b/benchmark/BaseModel.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa98a37ffd0c8d8e6bb33782141973923e8fedb81353ca7e7ed219eecb972a30 +size 100587 diff --git a/benchmark/evaluatedbyLLM.png b/benchmark/evaluatedbyLLM.png new file mode 100644 index 0000000..db48051 --- /dev/null +++ b/benchmark/evaluatedbyLLM.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764f1590ef17221e029e1a13f764e46fcad35f3880b8554ae57a9bdfd02531a0 +size 102408 diff --git a/config.json b/config.json new file mode 100644 index 0000000..f7e0b5c --- /dev/null +++ b/config.json @@ -0,0 +1,72 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": null, + "torch_dtype": "float16", + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2560, + "initializer_range": 0.02, + "intermediate_size": 9728, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 262144, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "pad_token_id": 151669, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 5000000, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "unsloth_version": "2026.4.2", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file