From 17f05b10893bd18558b3c69f7af880fffc6c1653 Mon Sep 17 00:00:00 2001 From: yiz-liu <136800916+yiz-liu@users.noreply.github.com> Date: Fri, 23 May 2025 15:50:48 +0800 Subject: [PATCH] [Feature] Add CustomQwen3MoeForCausalLM model (#925) Tweak packed_modules_mapping to support W8A8 weights. ### What this PR does / why we need it? ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Signed-off-by: Yizhou Liu --- vllm_ascend/models/__init__.py | 4 ++++ vllm_ascend/models/qwen3_moe.py | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 vllm_ascend/models/qwen3_moe.py diff --git a/vllm_ascend/models/__init__.py b/vllm_ascend/models/__init__.py index 42960f1..e7f021f 100644 --- a/vllm_ascend/models/__init__.py +++ b/vllm_ascend/models/__init__.py @@ -29,3 +29,7 @@ def register_model(): ModelRegistry.register_model( "DeepseekV3ForCausalLM", "vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM") + + ModelRegistry.register_model( + "Qwen3MoeForCausalLM", + "vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM") diff --git a/vllm_ascend/models/qwen3_moe.py b/vllm_ascend/models/qwen3_moe.py new file mode 100644 index 0000000..8ff1b52 --- /dev/null +++ b/vllm_ascend/models/qwen3_moe.py @@ -0,0 +1,35 @@ +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Adapted from vllm/model_executor/models/qwen3_moe.py +# This file is a part of the vllm-ascend project. + +from vllm.model_executor.models.qwen3_moe import Qwen3MoeForCausalLM + + +class CustomQwen3MoeForCausalLM(Qwen3MoeForCausalLM): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + "experts": + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], + }