[Quant][GLM] Adapt glm quant. (#3147)

adapt glm quant
- vLLM version: v0.10.2
- vLLM main:
f225ea7dd9

Signed-off-by: whx-sjtu <2952154980@qq.com>
This commit is contained in:
whx
2025-09-25 11:13:29 +08:00
committed by GitHub
parent a055183821
commit c814b32b90

View File

@@ -210,7 +210,20 @@ packed_modules_model_mapping = {
"gate_proj",
"up_proj",
],
}
},
"glm4_moe": {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
"experts":
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
},
}