初始化项目,由ModelHub XC社区提供模型

Model: openwalrus/Qwen3-1.7B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-18 08:09:17 +08:00
commit 169eb040d2
32 changed files with 591 additions and 0 deletions

62
.gitattributes vendored Normal file
View File

@@ -0,0 +1,62 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-Q2_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-Q4_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-Q3_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q2_K_L.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-Q5_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-Q6_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-UD-Q8_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
imatrix_unsloth.dat filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-BF16.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text

3
Qwen3-1.7B-BF16.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3412cc86f04dd7b49cb5fe05249690a95d643ee0f1f980cd5e777a8f40eb37f9
size 3447349568

3
Qwen3-1.7B-IQ4_NL.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:54d4991db0f405c5d81c41a22094026cbc37bb2ee445b551bb1754204b5cb680
size 1054423616

3
Qwen3-1.7B-IQ4_XS.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a02e41d3208e97a7cb224297e8d3abb22e5bb8d664362c6be4f48948a3797eec
size 1010383424

3
Qwen3-1.7B-Q2_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:62b3fb705434cb57fabc59d59aa7b4c6fb558fff7c7c4b2ce67456373bc30fd3
size 777796160

3
Qwen3-1.7B-Q2_K_L.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:62b3fb705434cb57fabc59d59aa7b4c6fb558fff7c7c4b2ce67456373bc30fd3
size 777796160

3
Qwen3-1.7B-Q3_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8070f21b9763ef653289e20252592d0aab6680948f36eb5b4362d403fef09170
size 939539008

3
Qwen3-1.7B-Q3_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b6afc9f02da55f66c086ac00bc4483b7bb89bbfad87fc93602d359151362bf2d
size 867252800

3
Qwen3-1.7B-Q4_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c876f159707a4e4f70e045106c69db15bfc935a4981706fd4f65c6e7ea1e81c5
size 1056782912

3
Qwen3-1.7B-Q4_1.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f6d4269abd6e61cda9962b0363f7ddc06f1186f27baa9c9e51d3dd309b02146
size 1142504000

3
Qwen3-1.7B-Q4_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b139949c5bd74937ad8ed8c8cf3d9ffb1e99c866c823204dc42c0d91fa181897
size 1107409472

3
Qwen3-1.7B-Q4_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:71eed840867db10f14b3332e3e0bf0a36b98b762c5b0bf9e091a3e00ecd21805
size 1060190784

3
Qwen3-1.7B-Q5_K_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b0949de5b2e06cbed6aa96517f9bd8afb334584b6f95ee83479292ff4bdd8ed3
size 1257880128

3
Qwen3-1.7B-Q5_K_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:43dc60ef9b7d27e43a61c05e13941503ab6d86944c306ef5207f164cb47cc982
size 1230584384

3
Qwen3-1.7B-Q6_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6a9cadec4883df6f3efcf337103479dcc6a3efa9f5f8cc64a904dba57808207a
size 1417755200

3
Qwen3-1.7B-Q8_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0becaa825564295d82e9af4d008bca5f8b7f5f73bf1c6a0b58f7c53ef26b47fd
size 1834426944

3
Qwen3-1.7B-UD-IQ1_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:77d1aceec4f28c45328d98ef58eefdbf5643da3513b7e777ad311b5014a58739
size 561947200

3
Qwen3-1.7B-UD-IQ1_S.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:762cff2d202b1f0af82ba840ba4a66b16b6268d10d72b7ef5bfabda080051ad3
size 537829952

3
Qwen3-1.7B-UD-IQ2_M.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0616ab2f6543a28e119bf3e7a24c5c7686b86b16fae69bea81b457c30506d358
size 708715072

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bcd3ccb440bb1e57c49e48e69c2fe790a3cd4682f8a6f128fcd1e2222102c6fc
size 605790784

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7727d3c027da39d72f846038cad89456cab0866ee4a5328f9aced19e3d850e86
size 765174336

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4a76542930ebab924832e9fe5d1970122564c026411162ee48a4d503b1a16f5f
size 797866560

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:21499d25b4dfc46066caaeef39fff5b43a4bfab80aef7c70d1db1a31ec02ed58
size 968899136

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:01977643b1d7292d09eb6ef75c7af8b084a3bbf2cb4e1d128ab76adeb8d75490
size 1132952128

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f2d3ad6203939ffcf169ac9fe4f7d7430ce227e024395850cd878fe646912c08
size 1262991936

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e7232c93d12498a74a19826c367681314d86d5e468dacc6afe24bc6adbd7754
size 1610949184

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7da8dded81945002c1024cb71cc95b6cc5013b82289353f352a47c99395de65c
size 2332582464

356
README.md Normal file
View File

@@ -0,0 +1,356 @@
---
base_model: Qwen/Qwen3-1.7B
language:
- en
library_name: transformers
license_link: https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE
license: apache-2.0
tags:
- qwen3
- qwen
- unsloth
- transformers
---
<div>
<p style="margin-bottom: 0; margin-top: 0;">
<strong>See <a href="https://huggingface.co/collections/unsloth/qwen3-680edabfb790c8c34a242f95">our collection</a> for all versions of Qwen3 including GGUF, 4-bit & 16-bit formats.</strong>
</p>
<p style="margin-bottom: 0;">
<em>Learn to run Qwen3 correctly - <a href="https://docs.unsloth.ai/basics/qwen3-how-to-run-and-fine-tune">Read our Guide</a>.</em>
</p>
<p style="margin-top: 0;margin-bottom: 0;">
<em><a href="https://docs.unsloth.ai/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0</a> achieves superior accuracy & outperforms other leading quants.</em>
</p>
<div style="display: flex; gap: 5px; align-items: center; ">
<a href="https://github.com/unslothai/unsloth/">
<img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
</a>
<a href="https://discord.gg/unsloth">
<img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
</a>
<a href="https://docs.unsloth.ai/basics/qwen3-how-to-run-and-fine-tune">
<img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
</a>
</div>
<h1 style="margin-top: 0rem;">✨ Run & Fine-tune Qwen3 with Unsloth!</h1>
</div>
- Fine-tune Qwen3 (14B) for free using our Google [Colab notebook here](https://docs.unsloth.ai/get-started/unsloth-notebooks)!
- Read our Blog about Qwen3 support: [unsloth.ai/blog/qwen3](https://unsloth.ai/blog/qwen3)
- View the rest of our notebooks in our [docs here](https://docs.unsloth.ai/get-started/unsloth-notebooks).
- Run & export your fine-tuned model to Ollama, llama.cpp or HF.
| Unsloth supports | Free Notebooks | Performance | Memory use |
|-----------------|--------------------------------------------------------------------------------------------------------------------------|-------------|----------|
| **Qwen3 (14B)** | [▶️ Start on Colab](https://docs.unsloth.ai/get-started/unsloth-notebooks) | 3x faster | 70% less |
| **GRPO with Qwen3 (8B)** | [▶️ Start on Colab](https://docs.unsloth.ai/get-started/unsloth-notebooks) | 3x faster | 80% less |
| **Llama-3.2 (3B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb) | 2.4x faster | 58% less |
| **Llama-3.2 (11B vision)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb) | 2x faster | 60% less |
| **Qwen2.5 (7B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen2.5_(7B)-Alpaca.ipynb) | 2x faster | 60% less |
| **Phi-4 (14B)** | [▶️ Start on Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Phi_4-Conversational.ipynb) | 2x faster | 50% less |
# To Switch Between Thinking and Non-Thinking
If you are using llama.cpp, Ollama, Open WebUI etc., you can add `/think` and `/no_think` to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations.
Here is an example of multi-turn conversation:
```
> Who are you /no_think
<think>
</think>
I am Qwen, a large-scale language model developed by Alibaba Cloud. [...]
> How many 'r's are in 'strawberries'? /think
<think>
Okay, let's see. The user is asking how many times the letter 'r' appears in the word "strawberries". [...]
</think>
The word strawberries contains 3 instances of the letter r. [...]
```
# Qwen3-1.7B
## Qwen3 Highlights
Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:
- **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.
- **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.
- **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.
- **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.
- **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.
## Model Overview
**Qwen3-1.7B** has the following features:
- Type: Causal Language Models
- Training Stage: Pretraining & Post-training
- Number of Parameters: 1.7B
- Number of Paramaters (Non-Embedding): 1.4B
- Number of Layers: 28
- Number of Attention Heads (GQA): 16 for Q and 8 for KV
- Context Length: 32,768
For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3/), [GitHub](https://github.com/QwenLM/Qwen3), and [Documentation](https://qwen.readthedocs.io/en/latest/).
## Quickstart
The code of Qwen3 has been in the latest Hugging Face `transformers` and we advise you to use the latest version of `transformers`.
With `transformers<4.51.0`, you will encounter the following error:
```
KeyError: 'qwen3'
```
The following contains a code snippet illustrating how to use the model generate content based on given inputs.
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen3-1.7B"
# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
# prepare the model input
prompt = "Give me a short introduction to large language model."
messages = [
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# conduct text completion
generated_ids = model.generate(
**model_inputs,
max_new_tokens=32768
)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
# parsing thinking content
try:
# rindex finding 151668 (</think>)
index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
index = 0
thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
print("thinking content:", thinking_content)
print("content:", content)
```
For deployment, you can use `vllm>=0.8.5` or `sglang>=0.4.5.post2` to create an OpenAI-compatible API endpoint:
- vLLM:
```shell
vllm serve Qwen/Qwen3-1.7B --enable-reasoning --reasoning-parser deepseek_r1
```
- SGLang:
```shell
python -m sglang.launch_server --model-path Qwen/Qwen3-1.7B --reasoning-parser deepseek-r1
```
## Switching Between Thinking and Non-Thinking Mode
> [!TIP]
> The `enable_thinking` switch is also available in APIs created by vLLM and SGLang.
> Please refer to [our documentation](https://qwen.readthedocs.io/) for more details.
### `enable_thinking=True`
By default, Qwen3 has thinking capabilities enabled, similar to QwQ-32B. This means the model will use its reasoning abilities to enhance the quality of generated responses. For example, when explicitly setting `enable_thinking=True` or leaving it as the default value in `tokenizer.apply_chat_template`, the model will engage its thinking mode.
```python
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=True # True is the default value for enable_thinking
)
```
In this mode, the model will generate think content wrapped in a `<think>...</think>` block, followed by the final response.
> [!NOTE]
> For thinking mode, use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0` (the default setting in `generation_config.json`). **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions. For more detailed guidance, please refer to the [Best Practices](#best-practices) section.
### `enable_thinking=False`
We provide a hard switch to strictly disable the model's thinking behavior, aligning its functionality with the previous Qwen2.5-Instruct models. This mode is particularly useful in scenarios where disabling thinking is essential for enhancing efficiency.
```python
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False # Setting enable_thinking=False disables thinking mode
)
```
In this mode, the model will not generate any think content and will not include a `<think>...</think>` block.
> [!NOTE]
> For non-thinking mode, we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`. For more detailed guidance, please refer to the [Best Practices](#best-practices) section.
### Advanced Usage: Switching Between Thinking and Non-Thinking Modes via User Input
We provide a soft switch mechanism that allows users to dynamically control the model's behavior when `enable_thinking=True`. Specifically, you can add `/think` and `/no_think` to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations.
Here is an example of a multi-turn conversation:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
class QwenChatbot:
def __init__(self, model_name="Qwen/Qwen3-1.7B"):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModelForCausalLM.from_pretrained(model_name)
self.history = []
def generate_response(self, user_input):
messages = self.history + [{"role": "user", "content": user_input}]
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = self.tokenizer(text, return_tensors="pt")
response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()
response = self.tokenizer.decode(response_ids, skip_special_tokens=True)
# Update history
self.history.append({"role": "user", "content": user_input})
self.history.append({"role": "assistant", "content": response})
return response
# Example Usage
if __name__ == "__main__":
chatbot = QwenChatbot()
# First input (without /think or /no_think tags, thinking mode is enabled by default)
user_input_1 = "How many r's in strawberries?"
print(f"User: {user_input_1}")
response_1 = chatbot.generate_response(user_input_1)
print(f"Bot: {response_1}")
print("----------------------")
# Second input with /no_think
user_input_2 = "Then, how many r's in blueberries? /no_think"
print(f"User: {user_input_2}")
response_2 = chatbot.generate_response(user_input_2)
print(f"Bot: {response_2}")
print("----------------------")
# Third input with /think
user_input_3 = "Really? /think"
print(f"User: {user_input_3}")
response_3 = chatbot.generate_response(user_input_3)
print(f"Bot: {response_3}")
```
> **Note**
> For API compatibility, when `enable_thinking=True`, regardless of whether the user uses `/think` or `/no_think`, the model will always output a block wrapped in `<think>...</think>`. However, the content inside this block may be empty if thinking is disabled.
> When `enable_thinking=False`, the soft switches are not valid. Regardless of any `/think` or `/no_think` tags input by the user, the model will not generate think content and will not include a `<think>...</think>` block.
## Agentic Use
Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
```python
from qwen_agent.agents import Assistant
# Define LLM
llm_cfg = {
'model': 'Qwen3-1.7B',
# Use the endpoint provided by Alibaba Model Studio:
# 'model_type': 'qwen_dashscope',
# 'api_key': os.getenv('DASHSCOPE_API_KEY'),
# Use a custom endpoint compatible with OpenAI API:
'model_server': 'http://localhost:8000/v1', # api_base
'api_key': 'EMPTY',
# Other parameters:
# 'generate_cfg': {
# # Add: When the response content is `<think>this is the thought</think>this is the answer;
# # Do not add: When the response has been separated by reasoning_content and content.
# 'thought_in_content': True,
# },
}
# Define Tools
tools = [
{'mcpServers': { # You can specify the MCP configuration file
'time': {
'command': 'uvx',
'args': ['mcp-server-time', '--local-timezone=Asia/Shanghai']
},
"fetch": {
"command": "uvx",
"args": ["mcp-server-fetch"]
}
}
},
'code_interpreter', # Built-in tools
]
# Define Agent
bot = Assistant(llm=llm_cfg, function_list=tools)
# Streaming generation
messages = [{'role': 'user', 'content': 'https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen'}]
for responses in bot.run(messages=messages):
pass
print(responses)
```
## Best Practices
To achieve optimal performance, we recommend the following settings:
1. **Sampling Parameters**:
- For thinking mode (`enable_thinking=True`), use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0`. **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions.
- For non-thinking mode (`enable_thinking=False`), we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`.
- For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.
2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 38,912 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.
3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
- **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
- **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: "Please show your choice in the `answer` field with only the choice letter, e.g., `"answer": "C"`."
4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.
### Citation
If you find our work helpful, feel free to give us a cite.
```
@misc{qwen3,
title = {Qwen3},
url = {https://qwenlm.github.io/blog/qwen3/},
author = {Qwen Team},
month = {April},
year = {2025}
}
```

31
config.json Normal file
View File

@@ -0,0 +1,31 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"eos_token_id": 151645,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 6144,
"max_position_embeddings": 40960,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 151654,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"sliding_window": null,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.52.0.dev0",
"unsloth_fixed": true,
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 151936
}

3
imatrix_unsloth.dat Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e85aa84d17c17a4d9d3add356d675c9cdcadd7a7bf5871580a92e266df0da803
size 2070884

11
params Normal file
View File

@@ -0,0 +1,11 @@
{
"stop": [
"<|im_start|>",
"<|im_end|>"
],
"temperature": 0.6,
"min_p" : 0.00,
"repeat_penalty" : 1.0,
"top_k" : 20,
"top_p" : 0.95
}

50
template Normal file
View File

@@ -0,0 +1,50 @@
{{- if .Messages }}
{{- if or .System .Tools }}<|im_start|>system
{{- if .System }}
{{ .System }}
{{- end }}
{{- if .Tools }}
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{{- range .Tools }}
{"type": "function", "function": {{ .Function }}}
{{- end }}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>
{{- end }}<|im_end|>
{{ end }}
{{- range $i, $_ := .Messages }}
{{- $last := eq (len (slice $.Messages $i)) 1 -}}
{{- if eq .Role "user" }}<|im_start|>user
{{ .Content }}<|im_end|>
{{ else if eq .Role "assistant" }}<|im_start|>assistant
{{ if .Content }}{{ .Content }}
{{- else if .ToolCalls }}<tool_call>
{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
{{ end }}</tool_call>
{{- end }}{{ if not $last }}<|im_end|>
{{ end }}
{{- else if eq .Role "tool" }}<|im_start|>user
<tool_response>
{{ .Content }}
</tool_response><|im_end|>
{{ end }}
{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
{{ end }}
{{- end }}
{{- else }}
{{- if .System }}<|im_start|>system
{{ .System }}<|im_end|>
{{ end }}{{ if .Prompt }}<|im_start|>user
{{ .Prompt }}<|im_end|>
{{ end }}<|im_start|>assistant
{{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}