初始化项目,由ModelHub XC社区提供模型

Model: floria-n/GLM-4.7-Flash-GGUF-Config
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-12 06:10:57 +08:00
commit 90d2ce07fc
37 changed files with 801 additions and 0 deletions

66
.gitattributes vendored Normal file
View File

@@ -0,0 +1,66 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
BF16/GLM-4.7-Flash-BF16-00001-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
BF16/GLM-4.7-Flash-BF16-00002-of-00002.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-Q2_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-Q3_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-Q4_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-Q5_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-TQ1_0.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q2_K_L.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q4_1.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-Q6_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-UD-Q8_K_XL.gguf filter=lfs diff=lfs merge=lfs -text
imatrix_unsloth.gguf_file filter=lfs diff=lfs merge=lfs -text
GLM-4.7-Flash-MXFP4_MOE.gguf filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:67ff61e44750b0caf0f674bb22e8083da0dafd4a8b98e8a5212c398266ba3167
size 49912579232

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fc3b83f60bd00b9b7b045aa0fc6a9f551f65c1e855a0ad8d17fab18391d1b45f
size 9996258464

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:da2e17e8227fa048092798b66a9ae5f4e38155811af5d8b2cbcc6104d42c5e6b
size 17165033824

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:44ad6ab1885f3fd2282b1b684df729868ef2da65fd8adeab4eb848b25ac3925b
size 16271360352

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ecf4f3fd9448e45d01e1cacb54d9feaf308253d77e97893fac22b78bd9d04c4e
size 16968499296

3
GLM-4.7-Flash-Q2_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:96b400bda76c8297a12e6aa4ff894f260baf3e8dd814c8a49cce0d90bf114996
size 11344720224

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fffb4a51cdd37902106102078e12f949a9e5a985243df4a4e7ef333f3bff57ad
size 11419062624

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d11329183bbb320f11d025b66c4d546e4b58010381a580708ef2fe7415a92307
size 14614624608

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:21d16834f428215926466ce44862fccbd3337008b49ea4ac2f58abdd8ddd1e91
size 13288618336

3
GLM-4.7-Flash-Q4_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d0bbdfcde6e323ebf90a8b9e95da57100e972be1ec6f0bfa0fad0feaa426557e
size 17216676192

3
GLM-4.7-Flash-Q4_1.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:417217f748759db0787a248859555b48d7b8b0538fa575b98cb64647586d1df1
size 18972205408

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:29837ed2c0fc5f51981adf8ac8083fcf80743c598381f13e9f06cbad0498b174
size 18312339808

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4329be1a90062c8be84790322bfa3d0989cb4396f37f1a2e159788fa5e71c6be
size 17268318560

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:167848c4b460887d7a52c44c378d3996a0d38b3302ac2045494dc8b50869a893
size 21408850272

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6c50f2e5251d1c77a0305c6dafdde93fceb8ab15bc9cd263cc4b707e0f71ff46
size 20817813856

3
GLM-4.7-Flash-Q6_K.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f5bdc431802d7a3dab3be5a50ad0848a5569fd45c98c5e2ad83412c87cd887dc
size 24693098848

3
GLM-4.7-Flash-Q8_0.gguf Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8c0922b03326932a40315ce92bbee01cc836ad18664268019143c898bdd7dd0b
size 31842799968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bbfe183ffd1bdb1d2f22878c9179d6336194ef35aa17d35045677c6f40de2cb4
size 9808507232

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:234e0149f81b181bf52ca3998386604965bb6062985e8e8ba7e77785cb6aade6
size 9247983968

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c07e2be576052a3acda8c43683a1816c45639d05e82302fab1b0ad5f71c0db62
size 10991776096

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d927162886f337ef4e1ddbeb6de1aac2872981629291d3b2444da9e6a1205166
size 10513625440

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:36b022b9cc24dd09bac703def9b065267e7ea3e51367d927958baa21484d745c
size 12907368800

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:08a432581d3a797af07a021455ada33499185213ad250e85fb26daf3fe34c421
size 11888759136

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:582728c0f6931a1cf734d5a7cf60ec73522fb79811e004cbc12476dd69b033bf
size 13782004064

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b0d4fbc1211f891b4cfbf2a497160bfe06a49412420068904d426b7a13f4ba7f
size 17520169312

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1e02f8b6cb4ae5d14e05aa3bf57361db0a69c79cc2f44e12f5a0e4d10fb93cd5
size 21693931872

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b5ddf6a5531a4d06ce84a652419c8d32c0cb15ca64549d715f783de948a3f3e2
size 26175238496

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f07a3b8fce22b5774610ba60f7db9176ea9538ea9f6be357c4a1fced0c7c0882
size 35624923488

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a68a7af6b8b7620eb3367668be8c4b2dc3988aeed409d864ce2781bc4f5844c8
size 8331004256

179
README.md Normal file
View File

@@ -0,0 +1,179 @@
---
tags:
- unsloth
base_model:
- zai-org/GLM-4.7-Flash
language:
- en
- zh
library_name: transformers
license: mit
pipeline_tag: text-generation
---
# Read our How to [Run GLM-4.7-Flash Guide!](https://unsloth.ai/docs/models/glm-4.7-flash)
## Jan 21 update: llama.cpp fixed a bug that caused looping and poor outputs. We updated the GGUFs - please re-download the model for much better outputs.
- **Repeat penalty: Disable it, or set `--repeat-penalty 1.0`**
You can now use Z.ai's recommended parameters and get great results:
- For general use-case: `--temp 1.0 --top-p 0.95`
- For tool-calling: `--temp 0.7 --top-p 1.0`
- If using llama.cpp, set `--min-p 0.01` as llama.cpp's default is 0.05
You can also fine-tune GLM-4.7-Flash with Unsloth via our [GLM free notebook](https://unsloth.ai/docs/models/glm-4.7-flash#fine-tuning-glm-4.7-flash).
---
<div>
<p style="margin-top: 0;margin-bottom: 0;">
<em><a href="https://docs.unsloth.ai/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0</a> achieves superior accuracy & outperforms other leading quants.</em>
</p>
<div style="display: flex; gap: 5px; align-items: center; ">
<a href="https://github.com/unslothai/unsloth/">
<img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="133">
</a>
<a href="https://discord.gg/unsloth">
<img src="https://github.com/unslothai/unsloth/raw/main/images/Discord%20button.png" width="173">
</a>
<a href="https://unsloth.ai/docs/models/glm-4.7-flash">
<img src="https://raw.githubusercontent.com/unslothai/unsloth/refs/heads/main/images/documentation%20green%20button.png" width="143">
</a>
</div>
</div>
---
# GLM-4.7-Flash
<div align="center">
<img src=https://raw.githubusercontent.com/zai-org/GLM-4.5/refs/heads/main/resources/logo.svg width="15%"/>
</div>
<p align="center">
👋 Join our <a href="https://discord.gg/QR7SARHRxK" target="_blank">Discord</a> community.
<br>
📖 Check out the GLM-4.7 <a href="https://z.ai/blog/glm-4.7" target="_blank">technical blog</a>, <a href="https://arxiv.org/abs/2508.06471" target="_blank">technical report(GLM-4.5)</a>.
<br>
📍 Use GLM-4.7-Flash API services on <a href="https://docs.z.ai/guides/llm/glm-4.7">Z.ai API Platform. </a>
<br>
👉 One click to <a href="https://chat.z.ai">GLM-4.7</a>.
</p>
## Introduction
GLM-4.7-Flash is a 30B-A3B MoE model. As the strongest model in the 30B class, GLM-4.7-Flash offers a new option for lightweight deployment that balances performance and efficiency.
### Performances on Benchmarks
| Benchmark | GLM-4.7-Flash | Qwen3-30B-A3B-Thinking-2507 | GPT-OSS-20B |
|--------------------|---------------|-----------------------------|-------------|
| AIME 25 | 91.6 | 85.0 | 91.7 |
| GPQA | 75.2 | 73.4 | 71.5 |
| LCB v6 | 64.0 | 66.0 | 61.0 |
| HLE | 14.4 | 9.8 | 10.9 |
| SWE-bench Verified | 59.2 | 22.0 | 34.0 |
| τ²-Bench | 79.5 | 49.0 | 47.7 |
| BrowseComp | 42.8 | 2.29 | 28.3 |
## Serve GLM-4.7-Flash Locally
For local deployment, GLM-4.7-Flash supports inference frameworks including vLLM and SGLang. Comprehensive deployment
instructions are available in the official [Github](https://github.com/zai-org/GLM-4.5) repository.
vLLM and SGLang only support GLM-4.7-Flash on their main branches.
### vLLM
+ using pip (must use pypi.org as the index url):
```shell
pip install -U vllm --pre --index-url https://pypi.org/simple --extra-index-url https://wheels.vllm.ai/nightly
pip install git+https://github.com/huggingface/transformers.git
```
### SGLang
+ using pip install sglang from source, then update transformers to the latest main branch.
### transformers
using with transformers as
```shell
pip install git+https://github.com/huggingface/transformers.git
```
and then run:
```python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_PATH = "zai-org/GLM-4.7-Flash"
messages = [{"role": "user", "content": "hello"}]
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt",
)
model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path=MODEL_PATH,
torch_dtype=torch.bfloat16,
device_map="auto",
)
inputs = inputs.to(model.device)
generated_ids = model.generate(**inputs, max_new_tokens=128, do_sample=False)
output_text = tokenizer.decode(generated_ids[0][inputs.input_ids.shape[1]:])
print(output_text)
```
### vLLM
```shell
vllm serve zai-org/GLM-4.7-Flash \
--tensor-parallel-size 4 \
--speculative-config.method mtp \
--speculative-config.num_speculative_tokens 1 \
--tool-call-parser glm47 \
--reasoning-parser glm45 \
--enable-auto-tool-choice \
--served-model-name glm-4.7-flash
```
### SGLang
```shell
python3 -m sglang.launch_server \
--model-path zai-org/GLM-4.7-Flash \
--tp-size 4 \
--tool-call-parser glm47 \
--reasoning-parser glm45 \
--speculative-algorithm EAGLE \
--speculative-num-steps 3 \
--speculative-eagle-topk 1 \
--speculative-num-draft-tokens 4 \
--mem-fraction-static 0.8 \
--served-model-name glm-4.7-flash \
--host 0.0.0.0 \
--port 8000
```
## Citation
If you find our work useful in your research, please consider citing the following paper:
```bibtex
@misc{5team2025glm45agenticreasoningcoding,
title={GLM-4.5: Agentic, Reasoning, and Coding (ARC) Foundation Models},
author={GLM Team and Aohan Zeng and Xin Lv and Qinkai Zheng and Zhenyu Hou and Bin Chen and Chengxing Xie and Cunxiang Wang and Da Yin and Hao Zeng and Jiajie Zhang and Kedong Wang and Lucen Zhong and Mingdao Liu and Rui Lu and Shulin Cao and Xiaohan Zhang and Xuancheng Huang and Yao Wei and Yean Cheng and Yifan An and Yilin Niu and Yuanhao Wen and Yushi Bai and Zhengxiao Du and Zihan Wang and Zilin Zhu and Bohan Zhang and Bosi Wen and Bowen Wu and Bowen Xu and Can Huang and Casey Zhao and Changpeng Cai and Chao Yu and Chen Li and Chendi Ge and Chenghua Huang and Chenhui Zhang and Chenxi Xu and Chenzheng Zhu and Chuang Li and Congfeng Yin and Daoyan Lin and Dayong Yang and Dazhi Jiang and Ding Ai and Erle Zhu and Fei Wang and Gengzheng Pan and Guo Wang and Hailong Sun and Haitao Li and Haiyang Li and Haiyi Hu and Hanyu Zhang and Hao Peng and Hao Tai and Haoke Zhang and Haoran Wang and Haoyu Yang and He Liu and He Zhao and Hongwei Liu and Hongxi Yan and Huan Liu and Huilong Chen and Ji Li and Jiajing Zhao and Jiamin Ren and Jian Jiao and Jiani Zhao and Jianyang Yan and Jiaqi Wang and Jiayi Gui and Jiayue Zhao and Jie Liu and Jijie Li and Jing Li and Jing Lu and Jingsen Wang and Jingwei Yuan and Jingxuan Li and Jingzhao Du and Jinhua Du and Jinxin Liu and Junkai Zhi and Junli Gao and Ke Wang and Lekang Yang and Liang Xu and Lin Fan and Lindong Wu and Lintao Ding and Lu Wang and Man Zhang and Minghao Li and Minghuan Xu and Mingming Zhao and Mingshu Zhai and Pengfan Du and Qian Dong and Shangde Lei and Shangqing Tu and Shangtong Yang and Shaoyou Lu and Shijie Li and Shuang Li and Shuang-Li and Shuxun Yang and Sibo Yi and Tianshu Yu and Wei Tian and Weihan Wang and Wenbo Yu and Weng Lam Tam and Wenjie Liang and Wentao Liu and Xiao Wang and Xiaohan Jia and Xiaotao Gu and Xiaoying Ling and Xin Wang and Xing Fan and Xingru Pan and Xinyuan Zhang and Xinze Zhang and Xiuqing Fu and Xunkai Zhang and Yabo Xu and Yandong Wu and Yida Lu and Yidong Wang and Yilin Zhou and Yiming Pan and Ying Zhang and Yingli Wang and Yingru Li and Yinpei Su and Yipeng Geng and Yitong Zhu and Yongkun Yang and Yuhang Li and Yuhao Wu and Yujiang Li and Yunan Liu and Yunqing Wang and Yuntao Li and Yuxuan Zhang and Zezhen Liu and Zhen Yang and Zhengda Zhou and Zhongpei Qiao and Zhuoer Feng and Zhuorui Liu and Zichen Zhang and Zihan Wang and Zijun Yao and Zikang Wang and Ziqiang Liu and Ziwei Chai and Zixuan Li and Zuodong Zhao and Wenguang Chen and Jidong Zhai and Bin Xu and Minlie Huang and Hongning Wang and Juanzi Li and Yuxiao Dong and Jie Tang},
year={2025},
eprint={2508.06471},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2508.06471},
}

86
chat_template.jinja Normal file
View File

@@ -0,0 +1,86 @@
[gMASK]<sop>
{%- if tools -%}
<|system|>
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{% for tool in tools %}
{{ tool | tojson(ensure_ascii=False) }}
{% endfor %}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
{%- macro visible_text(content) -%}
{%- if content is string -%}
{{- content }}
{%- elif content is iterable and content is not mapping -%}
{%- for item in content -%}
{%- if item is mapping and item.type == 'text' -%}
{{- item.text }}
{%- elif item is string -%}
{{- item }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{- content }}
{%- endif -%}
{%- endmacro -%}
{%- set ns = namespace(last_user_index=-1) %}
{%- for m in messages %}
{%- if m.role == 'user' %}
{% set ns.last_user_index = loop.index0 -%}
{%- endif %}
{%- endfor %}
{% for m in messages %}
{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
{%- elif m.role == 'assistant' -%}
<|assistant|>
{%- set reasoning_content = '' %}
{%- set content = visible_text(m.content) %}
{%- if m.reasoning_content is string %}
{%- set reasoning_content = m.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
{{ '<think>' + reasoning_content.strip() + '</think>'}}
{%- else -%}
{{ '</think>' }}
{%- endif -%}
{%- if content.strip() -%}
{{ content.strip() }}
{%- endif -%}
{% if m.tool_calls %}
{% for tc in m.tool_calls %}
{%- if tc.function %}
{%- set tc = tc.function %}
{%- endif %}
{{- '<tool_call>' + tc.name -}}
{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
{% endif %}
{%- elif m.role == 'tool' -%}
{%- if m.content is string -%}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|observation|>' }}
{%- endif %}
{{- '<tool_response>' }}
{{- m.content }}
{{- '</tool_response>' }}
{%- else -%}
<|observation|>{% for tr in m.content %}
<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
{% endif -%}
{%- elif m.role == 'system' -%}
<|system|>{{ visible_text(m.content) }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
<|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
{%- endif -%}

45
config.json Normal file
View File

@@ -0,0 +1,45 @@
{
"architectures": [
"Glm4MoeLiteForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"pad_token_id": 154820,
"eos_token_id": [
154820,
154827,
154829
],
"hidden_act": "silu",
"hidden_size": 2048,
"intermediate_size": 10240,
"max_position_embeddings": 202752,
"model_type": "glm4_moe_lite",
"moe_intermediate_size": 1536,
"topk_method": "noaux_tc",
"norm_topk_prob": true,
"num_attention_heads": 20,
"n_group": 1,
"topk_group": 1,
"n_routed_experts": 64,
"n_shared_experts": 1,
"routed_scaling_factor": 1.8,
"num_experts_per_tok": 4,
"first_k_dense_replace": 1,
"num_hidden_layers": 47,
"num_key_value_heads": 20,
"num_nextn_predict_layers": 1,
"partial_rotary_factor": 1.0,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1000000,
"tie_word_embeddings": false,
"dtype": "bfloat16",
"transformers_version": "5.0.0rc0",
"q_lora_rank": 768,
"kv_lora_rank": 512,
"qk_nope_head_dim": 192,
"qk_rope_head_dim": 64,
"v_head_dim": 256,
"vocab_size": 154880
}

11
generation_config.json Normal file
View File

@@ -0,0 +1,11 @@
{
"_from_model_config": true,
"eos_token_id": [
154820,
154827,
154829
],
"pad_token_id": 154820,
"temperature": 1.0,
"transformers_version": "5.0.0.dev0"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:585336847c79c2fc9ba778169a775634b3acaeb4f107546d3f47f2022f916354
size 72447904

3
tokenizer.json Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:19e773648cb4e65de8660ea6365e10acca112d42a854923df93db4a6f333a82d
size 20217442

321
tokenizer_config.json Normal file
View File

@@ -0,0 +1,321 @@
{
"added_tokens_decoder": {
"154820": {
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154821": {
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154822": {
"content": "[gMASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154823": {
"content": "[sMASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154824": {
"content": "<sop>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154825": {
"content": "<eop>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154826": {
"content": "<|system|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154827": {
"content": "<|user|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154828": {
"content": "<|assistant|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154829": {
"content": "<|observation|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154830": {
"content": "<|begin_of_image|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154831": {
"content": "<|end_of_image|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154832": {
"content": "<|begin_of_video|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154833": {
"content": "<|end_of_video|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154834": {
"content": "<|begin_of_audio|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154835": {
"content": "<|end_of_audio|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154836": {
"content": "<|begin_of_transcription|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154837": {
"content": "<|end_of_transcription|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
"154838": {
"content": "<|code_prefix|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154839": {
"content": "<|code_middle|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154840": {
"content": "<|code_suffix|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154841": {
"content": "<think>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154842": {
"content": "</think>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154843": {
"content": "<tool_call>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154844": {
"content": "</tool_call>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154845": {
"content": "<tool_response>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154846": {
"content": "</tool_response>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154847": {
"content": "<arg_key>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154848": {
"content": "</arg_key>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154849": {
"content": "<arg_value>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154850": {
"content": "</arg_value>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154851": {
"content": "/nothink",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154852": {
"content": "<|begin_of_box|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154853": {
"content": "<|end_of_box|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154854": {
"content": "<|image|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
"154855": {
"content": "<|video|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
}
},
"additional_special_tokens": [
"<|endoftext|>",
"[MASK]",
"[gMASK]",
"[sMASK]",
"<sop>",
"<eop>",
"<|system|>",
"<|user|>",
"<|assistant|>",
"<|observation|>",
"<|begin_of_image|>",
"<|end_of_image|>",
"<|begin_of_video|>",
"<|end_of_video|>",
"<|begin_of_audio|>",
"<|end_of_audio|>",
"<|begin_of_transcription|>",
"<|end_of_transcription|>"
],
"clean_up_tokenization_spaces": false,
"do_lower_case": false,
"eos_token": "<|endoftext|>",
"extra_special_tokens": {},
"model_max_length": 128000,
"pad_token": "<|endoftext|>",
"padding_side": "left",
"remove_space": false,
"tokenizer_class": "PreTrainedTokenizer"
}