From a7eb42cf0a138bf22d0492546aa5b810cbe75fcf Mon Sep 17 00:00:00 2001 From: zhangyiming <34808445+menogrey@users.noreply.github.com> Date: Fri, 14 Nov 2025 15:43:22 +0800 Subject: [PATCH] [v0.11.0-dev][Bugfix][cherry-pick]bugfix for weight load of kimi-k2 (#4190) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? This is cherry-pick from #3798 Fix kimi-k2 start bug, weight load ERROR:https://github.com/vllm-project/vllm-ascend/issues/3785 ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/c9461e05a4ed3557cfbf4b15ded1e26761cc39ca --------- Signed-off-by: Levi-JQ Signed-off-by: menogrey <1299267905@qq.com> Co-authored-by: Levi <54832289+Levi-JQ@users.noreply.github.com> Co-authored-by: Levi-JQ Co-authored-by: zhaozx-cn --- .github/workflows/release_whl.yml | 8 +++++++- vllm_ascend/quantization/quant_config.py | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release_whl.yml b/.github/workflows/release_whl.yml index bf1f62f..351ea42 100644 --- a/.github/workflows/release_whl.yml +++ b/.github/workflows/release_whl.yml @@ -57,7 +57,13 @@ jobs: - name: Print run: | lscpu - + + - name: Free up disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: true + docker-images: false + - name: Build wheel run: | ls diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index e742852..d31a696 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -193,6 +193,11 @@ packed_modules_model_mapping = { ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], "fused_qkv_a_proj": ["q_a_proj", "kv_a_proj_with_mqa"] }, + "kimi_k2": { + "gate_up_proj": ["gate_proj", "up_proj"], + "experts": + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"] + }, "deepseek_v32": { "gate_up_proj": ["gate_proj", "up_proj"], "experts":