初始化项目,由ModelHub XC社区提供模型
Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF Source: Original Platform
This commit is contained in:
40
.gitattributes
vendored
Normal file
40
.gitattributes
vendored
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-4B-Thinking-2507.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-4B-Thinking-2507.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
Qwen3-4B-Thinking-2507.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
benchmark/evaluatedbyLLM.png filter=lfs diff=lfs merge=lfs -text
|
||||||
|
benchmark/BaseModel.png filter=lfs diff=lfs merge=lfs -text
|
||||||
54
Modelfile
Normal file
54
Modelfile
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
|
||||||
|
FROM Qwen3-4B-Thinking-2507.Q8_0.gguf
|
||||||
|
TEMPLATE """
|
||||||
|
{{- $lastUserIdx := -1 -}}
|
||||||
|
{{- range $idx, $msg := .Messages -}}
|
||||||
|
{{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}}
|
||||||
|
{{- end }}
|
||||||
|
{{- if or .System .Tools }}<|im_start|>system
|
||||||
|
{{ if .System }}
|
||||||
|
{{ .System }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Tools }}
|
||||||
|
|
||||||
|
# Tools
|
||||||
|
|
||||||
|
You may call one or more functions to assist with the user query.
|
||||||
|
|
||||||
|
You are provided with function signatures within <tools></tools> XML tags:
|
||||||
|
<tools>
|
||||||
|
{{- range .Tools }}
|
||||||
|
{"type": "function", "function": {{ .Function }}}
|
||||||
|
{{- end }}
|
||||||
|
</tools>
|
||||||
|
|
||||||
|
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
|
||||||
|
<tool_call>
|
||||||
|
{"name": <function-name>, "arguments": <args-json-object>}
|
||||||
|
</tool_call>
|
||||||
|
{{- end -}}
|
||||||
|
<|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- range $i, $_ := .Messages }}
|
||||||
|
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
|
||||||
|
{{- if eq .Role "user" }}<|im_start|>user
|
||||||
|
{{ .Content }}<|im_end|>
|
||||||
|
{{ else if eq .Role "assistant" }}<|im_start|>assistant
|
||||||
|
{{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}}
|
||||||
|
<think>{{ .Thinking }}</think>
|
||||||
|
{{ end -}}
|
||||||
|
{{ if .Content }}{{ .Content }}
|
||||||
|
{{- else if .ToolCalls }}<tool_call>
|
||||||
|
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
|
||||||
|
{{ end }}</tool_call>
|
||||||
|
{{- end }}{{ if not $last }}<|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- else if eq .Role "tool" }}<|im_start|>user
|
||||||
|
<tool_response>
|
||||||
|
{{ .Content }}
|
||||||
|
</tool_response><|im_end|>
|
||||||
|
{{ end }}
|
||||||
|
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
|
||||||
|
{{ end }}
|
||||||
|
{{- end }}
|
||||||
|
"""
|
||||||
3
Qwen3-4B-Thinking-2507.Q4_1.gguf
Normal file
3
Qwen3-4B-Thinking-2507.Q4_1.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:34b4b6a2ecb267031c2e77d060e3c6d1f9187b92b6184dbaa1609303631b3dad
|
||||||
|
size 2596628704
|
||||||
3
Qwen3-4B-Thinking-2507.Q6_K.gguf
Normal file
3
Qwen3-4B-Thinking-2507.Q6_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:22c40e83a277d8020803c6947cf066162be2d3b4b2dccf303d5cd6ee23e437f2
|
||||||
|
size 3306260704
|
||||||
3
Qwen3-4B-Thinking-2507.Q8_0.gguf
Normal file
3
Qwen3-4B-Thinking-2507.Q8_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:48b3ba219908d335b4e34b61694d968163f05727fab579da8daac5c9fdf37eb1
|
||||||
|
size 4280404704
|
||||||
87
README.md
Normal file
87
README.md
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
---
|
||||||
|
tags:
|
||||||
|
- gguf
|
||||||
|
- llama.cpp
|
||||||
|
- unsloth
|
||||||
|
license: apache-2.0
|
||||||
|
datasets:
|
||||||
|
- khazarai/qwen3.6-plus-high-reasoning-500x
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
base_model:
|
||||||
|
- khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
metrics:
|
||||||
|
- accuracy
|
||||||
|
---
|
||||||
|
|
||||||
|
# Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled-GGUF : GGUF
|
||||||
|
|
||||||
|
|
||||||
|
## Model: khazarai/Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
- **Success Rate**: 75.64%
|
||||||
|
|
||||||
|
## Model: Qwen/Qwen3-4B-Thinking-2507
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
- **Success Rate**: 73.73%
|
||||||
|
|
||||||
|
- **Benchmark**: khazarai/Multi-Domain-Reasoning-Benchmark
|
||||||
|
- **Total Questions**: 100
|
||||||
|
|
||||||
|
|
||||||
|
This is a reasoning-distilled variant of Qwen3-4B-Thinking, fine-tuned using LoRA via Unsloth to replicate the advanced reasoning capabilities of the larger Qwen3.6-plus teacher model.
|
||||||
|
The distillation process focuses on reducing the "rambling" and "uncertainty" often found in smaller models during complex tasks, replacing them with concise, structured, and actionable solution paths.
|
||||||
|
|
||||||
|
## Reasoning Comparison: Base vs. Distilled
|
||||||
|
|
||||||
|
The primary improvement in this model is the qualitative leap in reasoning structure. Below is a summary of the differences observed when solving complex graph problems (e.g., Shortest Path with Edge Reversals):
|
||||||
|
|
||||||
|
**Base Model (Qwen3-4B-Thinking)**:
|
||||||
|
|
||||||
|
- Style: Stream-of-consciousness, exploratory, and verbose.
|
||||||
|
- Behavior: The model often talks to itself ("Hmm, interesting", "Wait, no"), struggles to interpret problem constraints correctly on the first try, and enters loops of self-correction. It mimics a student trying to figure out the problem as they speak.
|
||||||
|
- Output: Contains high noise-to-signal ratio; solution paths are often buried under paragraphs of hesitation.
|
||||||
|
|
||||||
|
**Distilled Model (Qwen3-4B-Qwen3.6-plus-Reasoning-Distilled)**:
|
||||||
|
|
||||||
|
- Style: Structured, professional, and report-oriented.
|
||||||
|
- Behavior: The model analyzes the problem immediately, separates concerns (Input, Output, Constraints), and formulates a concrete algorithm plan (e.g., State-Space Dijkstra). It proceeds with confidence, avoiding logical dead-ends.
|
||||||
|
- Output: Provides a clean breakdown: Problem Analysis -> Intuition -> Algorithm -> Complexity Analysis -> Pseudocode.
|
||||||
|
|
||||||
|
**Verdict**: The distilled model transforms the raw potential of the base model into an engineering-grade tool.
|
||||||
|
|
||||||
|
|
||||||
|
## Model Specifications
|
||||||
|
|
||||||
|
- **Base Model**: Qwen/Qwen3-4B-Thinking-2507
|
||||||
|
- **Model Type**: Reasoning Distillation (QLoRA)
|
||||||
|
- **Framework**: Unsloth
|
||||||
|
- **Fine-tuning Method**: QLoRA (PEFT)
|
||||||
|
- **Teacher Model**: Qwen3.6-plus
|
||||||
|
- **Distillation Dataset**: khazarai/qwen3.6-plus-high-reasoning-500x
|
||||||
|
- Total Tokens: 1,739,249
|
||||||
|
- Max Sequence Length: 6,500 tokens
|
||||||
|
|
||||||
|
## Provided Quants
|
||||||
|
|
||||||
|
(sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants)
|
||||||
|
|
||||||
|
| Type | Size/GB | Notes |
|
||||||
|
|:-----|--------:|:------|
|
||||||
|
| Q4_K_1 | 2.3 | |
|
||||||
|
| Q6_K | 3.3 | very good quality |
|
||||||
|
| Q8_0 | 4.2 | fast, best quality |
|
||||||
|
| bf16 | 8.0 | 16 bpw, overkill |
|
||||||
|
|
||||||
|
Here is a handy graph by ikawrakow comparing some lower-quality quant
|
||||||
|
types (lower is better):
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
And here are Artefact2's thoughts on the matter:
|
||||||
|
https://gist.github.com/Artefact2/b5f810600771265fc1e39442288e8ec9
|
||||||
3
benchmark/BaseModel.png
Normal file
3
benchmark/BaseModel.png
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:fa98a37ffd0c8d8e6bb33782141973923e8fedb81353ca7e7ed219eecb972a30
|
||||||
|
size 100587
|
||||||
3
benchmark/evaluatedbyLLM.png
Normal file
3
benchmark/evaluatedbyLLM.png
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:764f1590ef17221e029e1a13f764e46fcad35f3880b8554ae57a9bdfd02531a0
|
||||||
|
size 102408
|
||||||
72
config.json
Normal file
72
config.json
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"Qwen3ForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": null,
|
||||||
|
"torch_dtype": "float16",
|
||||||
|
"eos_token_id": 151645,
|
||||||
|
"head_dim": 128,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 2560,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 9728,
|
||||||
|
"layer_types": [
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention"
|
||||||
|
],
|
||||||
|
"max_position_embeddings": 262144,
|
||||||
|
"max_window_layers": 36,
|
||||||
|
"model_type": "qwen3",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 36,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"pad_token_id": 151669,
|
||||||
|
"rms_norm_eps": 1e-06,
|
||||||
|
"rope_parameters": {
|
||||||
|
"rope_theta": 5000000,
|
||||||
|
"rope_type": "default"
|
||||||
|
},
|
||||||
|
"sliding_window": null,
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"unsloth_fixed": true,
|
||||||
|
"unsloth_version": "2026.4.2",
|
||||||
|
"use_cache": false,
|
||||||
|
"use_sliding_window": false,
|
||||||
|
"vocab_size": 151936
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user