初始化项目,由ModelHub XC社区提供模型
Model: ertghiu256/Qwen3-4b-tcomanr-merge Source: Original Platform
This commit is contained in:
44
.gitattributes
vendored
Normal file
44
.gitattributes
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tcomanr_Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
model_Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
converted-model.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
converted-model-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
model.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
model-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
model-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
model-F16.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
178
README.md
Normal file
178
README.md
Normal file
@@ -0,0 +1,178 @@
|
||||
---
|
||||
base_model:
|
||||
- ertghiu256/qwen3-4b-code-reasoning
|
||||
- Qwen/Qwen3-4B
|
||||
- Tesslate/UIGEN-T3-4B-Preview-MAX
|
||||
- ertghiu256/qwen-3-4b-mixture-of-thought
|
||||
- POLARIS-Project/Polaris-4B-Preview
|
||||
- ertghiu256/qwen3-math-reasoner
|
||||
- ertghiu256/qwen3-multi-reasoner
|
||||
- ValiantLabs/Qwen3-4B-Esper3
|
||||
- ValiantLabs/Qwen3-4B-ShiningValiant3
|
||||
- prithivMLmods/Crux-Qwen3_OpenThinking-4B
|
||||
library_name: transformers
|
||||
tags:
|
||||
- mergekit
|
||||
- merge
|
||||
- qwen3
|
||||
- think
|
||||
- reason
|
||||
- reasoning
|
||||
new_version: ertghiu256/Qwen3-4b-tcomanr-merge-v2
|
||||
---
|
||||
# Ties merged COde MAth aNd Reasoning model
|
||||
This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
|
||||
|
||||
## Merge Details
|
||||
This model aims to combine the code and math capabilities by merging multiple Qwen 3 finetunes.
|
||||
|
||||
# How to run
|
||||
You can run this model by using multiple interface choices
|
||||
|
||||
## transformers
|
||||
As the qwen team suggested to use
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
model_name = "ertghiu256/Qwen3-4b-tcomanr-merge"
|
||||
|
||||
# load the tokenizer and the model
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name,
|
||||
torch_dtype="auto",
|
||||
device_map="auto"
|
||||
)
|
||||
|
||||
# prepare the model input
|
||||
prompt = "Give me a short introduction to large language model."
|
||||
messages = [
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
text = tokenizer.apply_chat_template(
|
||||
messages,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True,
|
||||
enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
|
||||
)
|
||||
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
||||
|
||||
# conduct text completion
|
||||
generated_ids = model.generate(
|
||||
**model_inputs,
|
||||
max_new_tokens=32768
|
||||
)
|
||||
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
||||
|
||||
# parsing thinking content
|
||||
try:
|
||||
# rindex finding 151668 (</think>)
|
||||
index = len(output_ids) - output_ids[::-1].index(151668)
|
||||
except ValueError:
|
||||
index = 0
|
||||
|
||||
thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
|
||||
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
|
||||
|
||||
print("thinking content:", thinking_content)
|
||||
print("content:", content)
|
||||
```
|
||||
|
||||
## vllm
|
||||
Run this command
|
||||
```bash
|
||||
vllm serve ertghiu256/Qwen3-4b-tcomanr-merge --enable-reasoning --reasoning-parser deepseek_r1
|
||||
```
|
||||
|
||||
## Sglang
|
||||
Run this command
|
||||
```bash
|
||||
python -m sglang.launch_server --model-path ertghiu256/Qwen3-4b-tcomanr-merge --reasoning-parser deepseek-r1
|
||||
```
|
||||
|
||||
## llama.cpp
|
||||
Run this command
|
||||
```bash
|
||||
llama-server --hf-repo ertghiu256/Qwen3-4b-tcomanr-merge
|
||||
```
|
||||
or
|
||||
```bash
|
||||
llama-cli --hf ertghiu256/Qwen3-4b-tcomanr-merge
|
||||
```
|
||||
|
||||
## ollama
|
||||
Run this command
|
||||
```bash
|
||||
ollama run hf.co/ertghiu256/Qwen3-4b-tcomanr-merge:Q4_K_M
|
||||
```
|
||||
|
||||
## lm studio
|
||||
Search
|
||||
```
|
||||
ertghiu256/Qwen3-4b-tcomanr-merge
|
||||
```
|
||||
in the lm studio model search list then download
|
||||
|
||||
### Recomended parameters
|
||||
```
|
||||
temp: 0.6
|
||||
num_ctx: ≥8192
|
||||
top_p: 0.95
|
||||
top_k: 10
|
||||
```
|
||||
|
||||
### Merge Details
|
||||
This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [Qwen/Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B) as a base.
|
||||
|
||||
#### Models:
|
||||
The following models were included in the merge:
|
||||
* [ertghiu256/qwen3-4b-code-reasoning](https://huggingface.co/ertghiu256/qwen3-4b-code-reasoning)
|
||||
* [Tesslate/UIGEN-T3-4B-Preview-MAX](https://huggingface.co/Tesslate/UIGEN-T3-4B-Preview-MAX)
|
||||
* [ertghiu256/qwen-3-4b-mixture-of-thought](https://huggingface.co/ertghiu256/qwen-3-4b-mixture-of-thought)
|
||||
* [POLARIS-Project/Polaris-4B-Preview](https://huggingface.co/POLARIS-Project/Polaris-4B-Preview)
|
||||
* [ertghiu256/qwen3-math-reasoner](https://huggingface.co/ertghiu256/qwen3-math-reasoner)
|
||||
* [ertghiu256/qwen3-multi-reasoner](https://huggingface.co/ertghiu256/qwen3-multi-reasoner)
|
||||
* [ValiantLabs/Qwen3-4B-Esper3](https://huggingface.co/ValiantLabs/Qwen3-4B-Esper3)
|
||||
* [ValiantLabs/Qwen3-4B-ShiningValiant3](https://huggingface.co/ValiantLabs/Qwen3-4B-ShiningValiant3)
|
||||
* [prithivMLmods/Crux-Qwen3_OpenThinking-4B](https://huggingface.co/prithivMLmods/Crux-Qwen3_OpenThinking-4B)
|
||||
|
||||
#### Configuration
|
||||
The following YAML configuration was used to produce this model:
|
||||
|
||||
```yaml
|
||||
models:
|
||||
- model: ertghiu256/qwen3-math-reasoner
|
||||
parameters:
|
||||
weight: 0.7
|
||||
- model: ertghiu256/qwen3-4b-code-reasoning
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: ertghiu256/qwen-3-4b-mixture-of-thought
|
||||
parameters:
|
||||
weight: 0.9
|
||||
- model: POLARIS-Project/Polaris-4B-Preview
|
||||
parameters:
|
||||
weight: 0.7
|
||||
- model: ertghiu256/qwen3-multi-reasoner
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: ValiantLabs/Qwen3-4B-Esper3
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: Tesslate/UIGEN-T3-4B-Preview-MAX
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: ValiantLabs/Qwen3-4B-ShiningValiant3
|
||||
parameters:
|
||||
weight: 0.9
|
||||
- model: prithivMLmods/Crux-Qwen3_OpenThinking-4B
|
||||
parameters:
|
||||
weight: 0.4
|
||||
merge_method: ties
|
||||
base_model: Qwen/Qwen3-4B
|
||||
parameters:
|
||||
normalize: true
|
||||
int8_mask: true
|
||||
dtype: float16
|
||||
|
||||
```
|
||||
30
config.json
Normal file
30
config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 151643,
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 2560,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 9728,
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.51.3",
|
||||
"use_cache": true,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
34
mergekit_config.yml
Normal file
34
mergekit_config.yml
Normal file
@@ -0,0 +1,34 @@
|
||||
models:
|
||||
- model: ertghiu256/qwen3-math-reasoner
|
||||
parameters:
|
||||
weight: 0.7
|
||||
- model: ertghiu256/qwen3-4b-code-reasoning
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: ertghiu256/qwen-3-4b-mixture-of-thought
|
||||
parameters:
|
||||
weight: 0.9
|
||||
- model: POLARIS-Project/Polaris-4B-Preview
|
||||
parameters:
|
||||
weight: 0.7
|
||||
- model: ertghiu256/qwen3-multi-reasoner
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: ValiantLabs/Qwen3-4B-Esper3
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: Tesslate/UIGEN-T3-4B-Preview-MAX
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: ValiantLabs/Qwen3-4B-ShiningValiant3
|
||||
parameters:
|
||||
weight: 0.8
|
||||
- model: prithivMLmods/Crux-Qwen3_OpenThinking-4B
|
||||
parameters:
|
||||
weight: 0.9
|
||||
merge_method: ties
|
||||
base_model: Qwen/Qwen3-4B
|
||||
parameters:
|
||||
normalize: true
|
||||
int8_mask: true
|
||||
dtype: float16
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00002.safetensors
Normal file
3
model-00001-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d434ab0c1c26dbbbec174d772650d18418d25434edde5f001e5c7c9cb7d532de
|
||||
size 4990818520
|
||||
3
model-00002-of-00002.safetensors
Normal file
3
model-00002-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:203a9c8fbb555df8017ab7a360e0b33b6cf9879b5131ef8ae12e0edfad3fecbb
|
||||
size 3054163080
|
||||
3
model-F16.gguf
Normal file
3
model-F16.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5e64dbf081282165e7c76881758a4ee2caf1effb2377e43aea6117fd5e846099
|
||||
size 8051287136
|
||||
3
model-Q4_K_M.gguf
Normal file
3
model-Q4_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f8f5e618d2b45bae49a29dcca4cf848962d5288fbe7117f72169df7d0116bc55
|
||||
size 2497282656
|
||||
3
model-Q8_0.gguf
Normal file
3
model-Q8_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d2e131c03037cf6196cda6e74da5b29157aada1fbb913b0ef3e39781c3fd5297
|
||||
size 4280407232
|
||||
1
model.safetensors.index.json
Normal file
1
model.safetensors.index.json
Normal file
File diff suppressed because one or more lines are too long
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
239
tokenizer_config.json
Normal file
239
tokenizer_config.json
Normal file
@@ -0,0 +1,239 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
Reference in New Issue
Block a user