From d64bdd06ae656048040c34c0ff3b909293b5113b Mon Sep 17 00:00:00 2001 From: Levi <54832289+Levi-JQ@users.noreply.github.com> Date: Mon, 27 Oct 2025 21:18:35 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Bugfix=E3=80=91bugfix=20for=20weight?= =?UTF-8?q?=20load=20of=20=20kimi-k2=20(#3798)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Levi-JQ ### What this PR does / why we need it? Fix kimi-k2 start bug, weight load ERROR:https://github.com/vllm-project/vllm-ascend/issues/3785 ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/c9461e05a4ed3557cfbf4b15ded1e26761cc39ca Signed-off-by: Levi-JQ Co-authored-by: Levi-JQ Co-authored-by: zhaozx-cn --- vllm_ascend/quantization/quant_config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index 36dbcb17..5960d2f8 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -193,6 +193,11 @@ packed_modules_model_mapping = { ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], "fused_qkv_a_proj": ["q_a_proj", "kv_a_proj_with_mqa"] }, + "kimi_k2": { + "gate_up_proj": ["gate_proj", "up_proj"], + "experts": + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"] + }, "deepseek_v32": { "gate_up_proj": ["gate_proj", "up_proj"], "experts":