[Bugfix] Support Kimi-K2.5 models (#6755)
### What this PR does / why we need it?
This PR supports the Kimi-K2.5 models on the NPU of bf16 and w4a8
weights.
The corresponding PR in the vllm community has been merged:
https://github.com/vllm-project/vllm/pull/34501
### Does this PR introduce _any_ user-facing change?
- No.
### How was this patch tested?
We test the Kimi-K2.5 weights. The weights path:
https://modelscope.cn/models/Eco-Tech/Kimi-K2.5-W4A8
Successfully ran on 910B NPU using vllm-ascend by the w4a8 weights.
- vLLM version: v0.15.0
- vLLM main:
9562912cea
---------
Signed-off-by: LoganJane <LoganJane73@hotmail.com>
This commit is contained in:
@@ -54,6 +54,10 @@ QUANT_MODEL_PREFIX_MAPPINGS: dict[str, dict[str, str]] = {
|
||||
"language_model.lm_head.": "lm_head.",
|
||||
"language_model.model.": "model.language_model.",
|
||||
},
|
||||
"kimi_k25": {
|
||||
"mm_projector.linear_1": "mm_projector.proj.0",
|
||||
"mm_projector.linear_2": "mm_projector.proj.2",
|
||||
},
|
||||
}
|
||||
|
||||
# key: model_type
|
||||
@@ -393,8 +397,9 @@ class AscendModelSlimConfig(QuantizationConfig):
|
||||
else:
|
||||
from vllm.model_executor.layers.attention import Attention
|
||||
|
||||
if prefix.startswith("language_model"):
|
||||
prefix = prefix.split(".", 1)[-1]
|
||||
if model_type != "kimi_k2":
|
||||
if prefix.startswith("language_model"):
|
||||
prefix = prefix.split(".", 1)[-1]
|
||||
if isinstance(layer, LinearBase):
|
||||
if self.is_layer_skipped_ascend(prefix, self.packed_modules_mapping):
|
||||
# Delayed import to avoid circular import
|
||||
|
||||
Reference in New Issue
Block a user