diff --git a/vllm_ascend/models/__init__.py b/vllm_ascend/models/__init__.py index 42960f1..e7f021f 100644 --- a/vllm_ascend/models/__init__.py +++ b/vllm_ascend/models/__init__.py @@ -29,3 +29,7 @@ def register_model(): ModelRegistry.register_model( "DeepseekV3ForCausalLM", "vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM") + + ModelRegistry.register_model( + "Qwen3MoeForCausalLM", + "vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM") diff --git a/vllm_ascend/models/qwen3_moe.py b/vllm_ascend/models/qwen3_moe.py new file mode 100644 index 0000000..8ff1b52 --- /dev/null +++ b/vllm_ascend/models/qwen3_moe.py @@ -0,0 +1,35 @@ +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# Copyright 2023 The vLLM team. +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Adapted from vllm/model_executor/models/qwen3_moe.py +# This file is a part of the vllm-ascend project. + +from vllm.model_executor.models.qwen3_moe import Qwen3MoeForCausalLM + + +class CustomQwen3MoeForCausalLM(Qwen3MoeForCausalLM): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + "experts": + ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"], + }