[Bugfix] Support Kimi-K2.5 models (#6755)

### What this PR does / why we need it? This PR supports the Kimi-K2.5 models on the NPU of bf16 and w4a8 weights. The corresponding PR in the vllm community has been merged: https://github.com/vllm-project/vllm/pull/34501 ### Does this PR introduce _any_ user-facing change? - No. ### How was this patch tested? We test the Kimi-K2.5 weights. The weights path: https://modelscope.cn/models/Eco-Tech/Kimi-K2.5-W4A8 Successfully ran on 910B NPU using vllm-ascend by the w4a8 weights. - vLLM version: v0.15.0 - vLLM main: 9562912cea --------- Signed-off-by: LoganJane <LoganJane73@hotmail.com>
2026-02-25 14:51:46 +08:00
parent 4efd362bac
commit ed051737e9
3 changed files with 79 additions and 2 deletions
--- a/vllm_ascend/quantization/modelslim_config.py
+++ b/vllm_ascend/quantization/modelslim_config.py
@@ -54,6 +54,10 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = {
        "language_model.lm_head.": "lm_head.",
        "language_model.model.": "model.language_model.",
    },
+    "kimi_k25": {
+        "mm_projector.linear_1": "mm_projector.proj.0",
+        "mm_projector.linear_2": "mm_projector.proj.2",
+    },
 }

 # key: model_type
@@ -393,8 +397,9 @@ class AscendModelSlimConfig(QuantizationConfig):
        else:
            from vllm.model_executor.layers.attention import Attention

-        if prefix.startswith("language_model"):
-            prefix = prefix.split(".", 1)[-1]
+        if model_type != "kimi_k2":
+            if prefix.startswith("language_model"):
+                prefix = prefix.split(".", 1)[-1]
        if isinstance(layer, LinearBase):
            if self.is_layer_skipped_ascend(prefix, self.packed_modules_mapping):
                # Delayed import to avoid circular import