[Quant][GLM] Adapt glm quant. (#3147)
adapt glm quant
- vLLM version: v0.10.2
- vLLM main:
f225ea7dd9
Signed-off-by: whx-sjtu <2952154980@qq.com>
This commit is contained in:
@@ -210,7 +210,20 @@ packed_modules_model_mapping = {
|
||||
"gate_proj",
|
||||
"up_proj",
|
||||
],
|
||||
}
|
||||
},
|
||||
"glm4_moe": {
|
||||
"qkv_proj": [
|
||||
"q_proj",
|
||||
"k_proj",
|
||||
"v_proj",
|
||||
],
|
||||
"gate_up_proj": [
|
||||
"gate_proj",
|
||||
"up_proj",
|
||||
],
|
||||
"experts":
|
||||
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user