From eb648f739863c301aeaf72725f9d14f7ed8be518 Mon Sep 17 00:00:00 2001
From: LoganJane <42287016+LoganJane@users.noreply.github.com>
Date: Mon, 9 Mar 2026 16:07:16 +0800
Subject: [PATCH] [Bugfix] Support quant config in glm46v (#7062)

### What this PR does / why we need it?
We need to support quant config in glm46v
.
### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
We used the 'Ascend/msit' quantization method to test the w8a8 weights.
Successfully ran on NPU using vllm-ascend by the w8a8 weights.

- vLLM version: v0.16.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d

Signed-off-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com>
Co-authored-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com>
---
 vllm_ascend/quantization/modelslim_config.py | 34 ++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/vllm_ascend/quantization/modelslim_config.py b/vllm_ascend/quantization/modelslim_config.py
index 3678c6f3..3e3b308a 100644
--- a/vllm_ascend/quantization/modelslim_config.py
+++ b/vllm_ascend/quantization/modelslim_config.py
@@ -77,6 +77,16 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = {
     "qwen2_5_omni_text": {
         "language_model.": "thinker.",
     },
+    "glm4v_moe": {
+        "visual.": "model.visual.",
+        "language_model.lm_head.": "lm_head.",
+        "language_model.model.": "model.language_model.",
+    },
+    "glm4v_moe_text": {
+        "visual.": "model.visual.",
+        "language_model.lm_head.": "lm_head.",
+        "language_model.model.": "model.language_model.",
+    },
 }
 
 # key: model_type
@@ -186,6 +196,30 @@ packed_modules_model_mapping: dict[str, dict[str, list[str]]] = {
         "experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
         "fused_qkv_a_proj": ["q_a_proj", "kv_a_proj_with_mqa"],
     },
+    "glm4v_moe": {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+        "experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
+    },
+    "glm4v_moe_text": {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+        "experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
+    },
     "longcat_flash": {
         "gate_up_proj": ["gate_proj", "up_proj"],
         "experts": ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],