commit bcb53a3ee2a849d063bd8f2de0c30d7778a6a7fe Author: ModelHub XC Date: Fri Jun 19 16:03:14 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: prithivMLmods/Qwen3-8B-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..53d7257 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,47 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/Qwen3_8B.BF16.gguf b/Qwen3_8B.BF16.gguf new file mode 100644 index 0000000..6321e9a --- /dev/null +++ b/Qwen3_8B.BF16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714402b1bb855aaf85d4f2a0a9fdb6a2cd8bd03f5b0729210e2e800239a75654 +size 16388044000 diff --git a/Qwen3_8B.F16.gguf b/Qwen3_8B.F16.gguf new file mode 100644 index 0000000..9047aae --- /dev/null +++ b/Qwen3_8B.F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb2ba27d57fc860ad4b619a4368435f958eb95bf849c3ee6b70a4d72691db36 +size 16388044000 diff --git a/Qwen3_8B.F32.gguf b/Qwen3_8B.F32.gguf new file mode 100644 index 0000000..22c059b --- /dev/null +++ b/Qwen3_8B.F32.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7a522deca08856138a0512ab83b882a2a21515ac5d01a665313a2b5ca9b4ad +size 32768898272 diff --git a/Qwen3_8B.Q2_K.gguf b/Qwen3_8B.Q2_K.gguf new file mode 100644 index 0000000..4ba542b --- /dev/null +++ b/Qwen3_8B.Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e469b14c3595bcdbe838468862d48a105e334939fbc9ba91de198f0225393a +size 3281732832 diff --git a/Qwen3_8B.Q3_K_M.gguf b/Qwen3_8B.Q3_K_M.gguf new file mode 100644 index 0000000..0caf3f6 --- /dev/null +++ b/Qwen3_8B.Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c336b216db74800137b9a6c3b8e3ef59f507fc0427387db0375fccdd3462f6f8 +size 4124161248 diff --git a/Qwen3_8B.Q3_K_S.gguf b/Qwen3_8B.Q3_K_S.gguf new file mode 100644 index 0000000..8810912 --- /dev/null +++ b/Qwen3_8B.Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5247ef9c67d57b2faf3f5589dc0c18c1e050ddce0cbb399e6857f74084f5197f +size 3769611488 diff --git a/Qwen3_8B.Q4_K_M.gguf b/Qwen3_8B.Q4_K_M.gguf new file mode 100644 index 0000000..1c15331 --- /dev/null +++ b/Qwen3_8B.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61b080c5126803440357b37cfb7f44be2a9da41a36154963b9b7382d60fec316 +size 5027783904 diff --git a/Qwen3_8B.Q4_K_S.gguf b/Qwen3_8B.Q4_K_S.gguf new file mode 100644 index 0000000..a2601b9 --- /dev/null +++ b/Qwen3_8B.Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a57e766da7ac52327606f73b73cf3dc4e60bda9571b9743a77c87e76fdb702 +size 4802012384 diff --git a/Qwen3_8B.Q5_K_M.gguf b/Qwen3_8B.Q5_K_M.gguf new file mode 100644 index 0000000..7bf7af2 --- /dev/null +++ b/Qwen3_8B.Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2f40012f37fa3e3cbeb2db69ffb0dd2fc2e98b4a5232601f3b9f99569b8e96 +size 5851112672 diff --git a/Qwen3_8B.Q8_0.gguf b/Qwen3_8B.Q8_0.gguf new file mode 100644 index 0000000..bad826f --- /dev/null +++ b/Qwen3_8B.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da68b0bd54d8221208a75cb29322a51c564a4468dd86a8fde4cf58cec1ff7e84 +size 8709518560 diff --git a/README.md b/README.md new file mode 100644 index 0000000..a44e236 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +--- +license: apache-2.0 +language: +- en +base_model: +- Qwen/Qwen3-8B +pipeline_tag: text-generation +library_name: transformers +tags: +- text-generation-inference +- moe +--- + +# **Qwen3-8B-GGUF** + +> Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support + +## Model Files + +| File Name | Size | Quantization | Format | Description | +| ---------------------- | ------- | ------------ | ------ | -------------------------------- | +| `Qwen3_8B.F32.gguf` | 32.8 GB | FP32 | GGUF | Full precision (float32) version | +| `Qwen3_8B.BF16.gguf` | 16.4 GB | BF16 | GGUF | BFloat16 precision version | +| `Qwen3_8B.F16.gguf` | 16.4 GB | FP16 | GGUF | Float16 precision version | +| `Qwen3_8B.Q2_K.gguf` | 3.28 GB | Q2\_K | GGUF | 2-bit quantized (K variant) | +| `Qwen3_8B.Q3_K_M.gguf` | 4.12 GB | Q3\_K\_M | GGUF | 3-bit quantized (K M variant) | +| `Qwen3_8B.Q3_K_S.gguf` | 3.77 GB | Q3\_K\_S | GGUF | 3-bit quantized (K S variant) | +| `Qwen3_8B.Q4_K_M.gguf` | 5.03 GB | Q4\_K\_M | GGUF | 4-bit quantized (K M variant) | +| `Qwen3_8B.Q4_K_S.gguf` | 4.8 GB | Q4\_K\_S | GGUF | 4-bit quantized (K S variant) | +| `Qwen3_8B.Q5_K_M.gguf` | 5.85 GB | Q5\_K\_M | GGUF | 5-bit quantized (K M variant) | +| `Qwen3_8B.Q8_0.gguf` | 8.71 GB | Q8\_0 | GGUF | 8-bit quantized | +| `.gitattributes` | 2.08 kB | — | — | Git LFS tracking file | +| `config.json` | 31 B | — | — | Configuration placeholder | +| `README.md` | 31 B | — | — | Model documentation | + +## Quants Usage + +(sorted by size, not necessarily quality. IQ-quants are often preferable over similar sized non-IQ quants) + +| Link | Type | Size/GB | Notes | +|:-----|:-----|--------:|:------| +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q2_K.gguf) | Q2_K | 0.4 | | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q3_K_S.gguf) | Q3_K_S | 0.5 | | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q3_K_M.gguf) | Q3_K_M | 0.5 | lower quality | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q3_K_L.gguf) | Q3_K_L | 0.5 | | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.IQ4_XS.gguf) | IQ4_XS | 0.6 | | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q4_K_S.gguf) | Q4_K_S | 0.6 | fast, recommended | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q4_K_M.gguf) | Q4_K_M | 0.6 | fast, recommended | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q5_K_S.gguf) | Q5_K_S | 0.6 | | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q5_K_M.gguf) | Q5_K_M | 0.7 | | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q6_K.gguf) | Q6_K | 0.7 | very good quality | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.Q8_0.gguf) | Q8_0 | 0.9 | fast, best quality | +| [GGUF](https://huggingface.co/mradermacher/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B.f16.gguf) | f16 | 1.6 | 16 bpw, overkill | + +Here is a handy graph by ikawrakow comparing some lower-quality quant +types (lower is better): + +![image.png](https://www.nethype.de/huggingface_embed/quantpplgraph.png) \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..d1b8b99 --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +{ + "model_type": "qwen3" +} \ No newline at end of file diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file