From 2cb94282fb0a1a064a02d056903b5710b6baaffe Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Wed, 17 Jun 2026 14:51:17 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: Igriscodes/qwen-tool-gguf
Source: Original Platform
---
 .gitattributes              |  46 ++++++++
 README.md                   | 204 ++++++++++++++++++++++++++++++++++++
 qwen3-1.7b-tool-f16.gguf    |   3 +
 qwen3-1.7b-tool-q2_k.gguf   |   3 +
 qwen3-1.7b-tool-q3_k_m.gguf |   3 +
 qwen3-1.7b-tool-q4_0.gguf   |   3 +
 qwen3-1.7b-tool-q4_k_m.gguf |   3 +
 qwen3-1.7b-tool-q5_0.gguf   |   3 +
 qwen3-1.7b-tool-q5_k_m.gguf |   3 +
 qwen3-1.7b-tool-q6_k.gguf   |   3 +
 qwen3-1.7b-tool-q8_0.gguf   |   3 +
 11 files changed, 277 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 README.md
 create mode 100644 qwen3-1.7b-tool-f16.gguf
 create mode 100644 qwen3-1.7b-tool-q2_k.gguf
 create mode 100644 qwen3-1.7b-tool-q3_k_m.gguf
 create mode 100644 qwen3-1.7b-tool-q4_0.gguf
 create mode 100644 qwen3-1.7b-tool-q4_k_m.gguf
 create mode 100644 qwen3-1.7b-tool-q5_0.gguf
 create mode 100644 qwen3-1.7b-tool-q5_k_m.gguf
 create mode 100644 qwen3-1.7b-tool-q6_k.gguf
 create mode 100644 qwen3-1.7b-tool-q8_0.gguf

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..6a33b2a
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,46 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+qwen-tool-f16.gguf filter=lfs diff=lfs merge=lfs -text
+qwen-tool-q4.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-f16.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q2_k.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q4_0.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q5_0.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q6_k.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-1.7b-tool-q8_0.gguf filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e68ab26
--- /dev/null
+++ b/README.md
@@ -0,0 +1,204 @@
+---
+license: mpl-2.0
+base_model: Igriscodes/qwen3-1.7b-tool
+tags:
+- tool-use
+- function-calling
+- reinforcement-learning
+- mcp
+- gguf
+- quantized
+pipeline_tag: text-generation
+language:
+- en
+---
+
+# Qwen3-4B-Agentic-MCP-RL - GGUF
+
+This repository contains the GGUF quantization files for [Igriscodes/qwen-tool](https://huggingface.co/Igriscodes/qwen-tool), a fine-tuned `Qwen/Qwen3-1.7B` model optimized for multi-step tool use and structured payload delivery via the **Model Context Protocol (MCP)**. 
+
+The base model was aligned using **Proximal Policy Optimization (PPO)** on strict JSON validation, execution tracking, and tool-error recovery loops. These GGUF files allow for low-latency, low-memory local inference on edge devices, CPU-only systems, and Apple Silicon.
+
+## Available Quantizations
+
+* **Q2_K**: Maximum compression. Significant loss in logic, not recommended for complex tool-use but fits on ultra-low-memory devices.
+* **Q3_K_M**: Balanced 3-bit compression. Better logic than Q2, suitable for highly constrained memory footprints.
+* **Q4_0**: Standard legacy 4-bit quantization. Faster on certain older hardware architectures but slightly lower quality than K-quants.
+* **Q4_K_M**: **Recommended.** Optimal balance of reasoning performance, generation speed, and VRAM savings.
+* **Q5_0**: Standard legacy 5-bit quantization. Good middle ground, but outpaced by K-quants.
+* **Q5_K_M**: High quality 5-bit compression. Retains nearly all unquantized capabilities while saving substantial VRAM.
+* **Q6_K**: 6-bit quantization. Near-zero degradation from F16 while shaving off a decent chunk of file size.
+* **Q8_0**: Maximum 8-bit fidelity. Extremely close to native F16 performance, ideal for strict syntax and reliable tool-calling.
+* **F16**: Unquantized baseline. High fidelity, near-native performance for systems with more memory overhead.
+
+## Local Deployment Quickstart
+
+### Using Ollama
+Ollama supports running models directly from Hugging Face via the `hf.co` registry prefix. You can pull and run your preferred precision instantly:
+
+```bash
+# Q2_K (Extreme compression)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q2_K
+
+# Q3_K_M (Medium 3-bit)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q3_K_M
+
+# Q4_0 (Legacy 4-bit)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q4_0
+
+# Q4_K_M (Recommended balanced version)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q4_K_M
+
+# Q5_0 (Legacy 5-bit)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q5_0
+
+# Q5_K_M (High-fidelity 5-bit)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q5_K_M
+
+# Q6_K (Deep 6-bit)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q6_K
+
+# Q8_0 (Near-lossless 8-bit)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:Q8_0
+
+# F16 (High-fidelity unquantized float version)
+ollama run hf.co/Igriscodes/qwen3-1.7b-tool-gguf:F16
+
+```
+
+**or**
+
+## Ollama Setup Guide
+
+To run this model locally with full tool-calling (function calling) and thinking capabilities, you can easily package it into an **Ollama** model using the provided template configuration.
+
+### 1. Create the Modelfile
+
+Save the configuration block below exactly as a file named `Modelfile` in the same directory where your downloaded GGUF file is located.
+
+> 💡 **Note:** If you are using a different quantization format than the `q4_k_m` example below, make sure to update the `FROM` line to match your exact `.gguf` filename.
+
+```text
+# Point to your quantized GGUF file
+FROM ./qwen3-1.7b-tool-q4_k_m.gguf
+
+# Custom template optimizing tool-use syntax and thought blocks
+TEMPLATE """{{- $lastUserIdx := -1 -}}
+{{- range $idx, $msg := .Messages -}}
+{{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}}
+{{- end }}
+{{- if or .System .Tools }}<|im_start|>system
+{{ if .System }}{{ .System }}
+
+{{ end }}
+{{- if .Tools }}# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{{- range .Tools }}
+{"type": "function", "function": {{ .Function }}}
+{{- end }}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call>
+{{- end -}}
+<|im_end|>
+{{ end }}
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 -}}
+{{- if eq .Role "user" }}<|im_start|>user
+{{ .Content }}<|im_end|>
+{{ else if eq .Role "assistant" }}<|im_start|>assistant
+{{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}}
+<think>{{ .Thinking }}</think>
+{{ end -}}
+{{ if .Content }}{{ .Content }}{{ end }}
+{{- if .ToolCalls }}
+{{- range .ToolCalls }}
+<tool_call>
+{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
+</tool_call>
+{{- end }}
+{{- end }}{{ if not $last }}<|im_end|>
+{{ end }}
+{{- else if eq .Role "tool" }}<|im_start|>user
+<tool_response>
+{{ .Content }}
+</tool_response><|im_end|>
+{{ end }}
+{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
+<think>
+{{ end }}
+{{- end }}"""
+
+# Inference parameters optimized for structured reasoning
+PARAMETER temperature 0.6
+PARAMETER num_ctx 8192
+PARAMETER num_gpu -1
+PARAMETER top_k 20
+PARAMETER top_p 0.95
+PARAMETER repeat_penalty 1
+PARAMETER stop <|im_start|>
+PARAMETER stop <|im_end|>
+
+```
+
+### 2. Build and Run the Model
+
+Open your terminal, navigate to the directory containing your `Modelfile` and your `.gguf` file, and execute the build command:
+
+```bash
+ollama create qwen3-1.7b-tool --file Modelfile
+
+```
+
+Once the build process completes, you can launch and interact with your new custom model natively via Ollama:
+
+```bash
+ollama run qwen3-1.7b-tool
+
+```
+
+
+### Using Python (`llama-cpp-python`)
+
+First, ensure you have the library installed:
+
+```bash
+pip install llama-cpp-python
+```
+
+Depending on your hardware constraints, you can load either the uncompressed precision or the quantized version using the snippets below:
+
+#### Option 1: High Fidelity (F16 Precision)
+
+```python
+from llama_cpp import Llama
+
+llm = Llama.from_pretrained(
+    repo_id="Igriscodes/qwen3-1.7b-tool-gguf",
+    filename="qwen3-1.7b-tool-f16.gguf",
+    n_ctx=2048,
+    n_gpu_layers=-1 # Use -1 to offload all layers to GPU (Metal/CUDA)
+)
+
+```
+
+#### Option 2: Low Resource (Q4 Quantization)
+
+```python
+from llama_cpp import Llama
+
+llm = Llama.from_pretrained(
+    repo_id="Igriscodes/qwen3-1.7b-tool-gguf",
+    filename="qwen3-1.7b-tool-q4.gguf",
+    n_ctx=2048,
+    n_gpu_layers=-1 # Optimized for CPU execution or limited VRAM
+)
+
+```
\ No newline at end of file
diff --git a/qwen3-1.7b-tool-f16.gguf b/qwen3-1.7b-tool-f16.gguf
new file mode 100644
index 0000000..02ec280
--- /dev/null
+++ b/qwen3-1.7b-tool-f16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91c228e54a650f1e48413569dad3370fffe0262cc7629e1955f712ed5b55fbf1
+size 3447348928
diff --git a/qwen3-1.7b-tool-q2_k.gguf b/qwen3-1.7b-tool-q2_k.gguf
new file mode 100644
index 0000000..a424956
--- /dev/null
+++ b/qwen3-1.7b-tool-q2_k.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a064c8cad8303648044bae93a59dc18191320504c8711d7e5f8d9b915dee843e
+size 777795264
diff --git a/qwen3-1.7b-tool-q3_k_m.gguf b/qwen3-1.7b-tool-q3_k_m.gguf
new file mode 100644
index 0000000..3c89dad
--- /dev/null
+++ b/qwen3-1.7b-tool-q3_k_m.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:787d98b33a589e891bd2105e807b5003a3a80335b7b082740c89bf7f0c37aa28
+size 939538112
diff --git a/qwen3-1.7b-tool-q4_0.gguf b/qwen3-1.7b-tool-q4_0.gguf
new file mode 100644
index 0000000..637bb6c
--- /dev/null
+++ b/qwen3-1.7b-tool-q4_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aeb846a335598f027bc43981a8613b4594138cf08a7a5ae18616fb6a6d84d30
+size 1054422720
diff --git a/qwen3-1.7b-tool-q4_k_m.gguf b/qwen3-1.7b-tool-q4_k_m.gguf
new file mode 100644
index 0000000..ebf6980
--- /dev/null
+++ b/qwen3-1.7b-tool-q4_k_m.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7971b1cc6b70c219db23e576d57fceec58dde3c62f355715938473941402b76f
+size 1107408576
diff --git a/qwen3-1.7b-tool-q5_0.gguf b/qwen3-1.7b-tool-q5_0.gguf
new file mode 100644
index 0000000..5ba9a49
--- /dev/null
+++ b/qwen3-1.7b-tool-q5_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d52059713b0b8006dadefe5dcd577dc3bab54ceef1d4d0eb3828d7c6dcc0cd
+size 1230583488
diff --git a/qwen3-1.7b-tool-q5_k_m.gguf b/qwen3-1.7b-tool-q5_k_m.gguf
new file mode 100644
index 0000000..271a619
--- /dev/null
+++ b/qwen3-1.7b-tool-q5_k_m.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95375dcc3fcb69f22adbe0db591fe10c26f96f1c692604320ba4bd4313e8b454
+size 1257879232
diff --git a/qwen3-1.7b-tool-q6_k.gguf b/qwen3-1.7b-tool-q6_k.gguf
new file mode 100644
index 0000000..72fd8e6
--- /dev/null
+++ b/qwen3-1.7b-tool-q6_k.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59d8a6c5fa54030475a03d3f5c0e1bdc0d39bac8b66508119128b55c818e4772
+size 1417754304
diff --git a/qwen3-1.7b-tool-q8_0.gguf b/qwen3-1.7b-tool-q8_0.gguf
new file mode 100644
index 0000000..53c0f16
--- /dev/null
+++ b/qwen3-1.7b-tool-q8_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afefb3d64477bdb5ca0e9506411738d544e6f796d077ce8a4a529580c6ce9baf
+size 1834426048