From c814b32b90591f36cb2ffc9b513c6ffe3f45b0d7 Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Thu, 25 Sep 2025 11:13:29 +0800 Subject: [PATCH] [Quant][GLM] Adapt glm quant. (#3147) adapt glm quant - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/f225ea7dd98e9f29752e5c032cd4a8ee1d712f16 Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm_ascend/quantization/quant_config.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index 8fe7767..1a5e74d 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -210,7 +210,20 @@ packed_modules_model_mapping = { "gate_proj", "up_proj", ], - } + }, + "glm4_moe": { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + "experts": + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"] + }, }