Files
xc-llm-ascend/vllm_ascend/models/__init__.py
weichen 94dd832815 [MoE] [Refactor] Combine common_fused_moe and fused_moe (#3176)
### What this PR does / why we need it?
1. Move additional functionalities from fused_moe.py to
common_fused_moe.py and remove fused_moe.py
2. Remove unnecessary custom classes from qwen3_moe.py, and it will be
completely removed after we release vllm-ascend v0.11.0

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Qwen3-30B-A3B/Qwen3-30B-A3B-W8A8/DeepSeek-V3-W4A8-Pruing/deepseek-mtp/pangu-pro-moe-pruing:

1. Enable/Disable EP
3. Aclgraph & eager
4. SP


- vLLM version: v0.11.0

---------

Signed-off-by: Pr0Wh1teGivee <calvin_zhu0210@outlook.com>
Co-authored-by: weijinqian0 <12153182+weijinqian0@users.noreply.github.com>
2025-10-09 14:12:46 +08:00

57 lines
2.1 KiB
Python

from vllm import ModelRegistry
import vllm_ascend.envs as envs_ascend
def register_model():
ModelRegistry.register_model(
"Qwen2VLForConditionalGeneration",
"vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
ModelRegistry.register_model(
"Qwen3VLMoeForConditionalGeneration",
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen3VLMoeForConditionalGeneration"
)
ModelRegistry.register_model(
"Qwen3VLForConditionalGeneration",
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration"
)
if envs_ascend.USE_OPTIMIZED_MODEL:
ModelRegistry.register_model(
"Qwen2_5_VLForConditionalGeneration",
"vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
)
else:
ModelRegistry.register_model(
"Qwen2_5_VLForConditionalGeneration",
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen2_5_VLForConditionalGeneration_Without_Padding"
)
ModelRegistry.register_model(
"DeepseekV2ForCausalLM",
"vllm_ascend.models.deepseek_v2:CustomDeepseekV2ForCausalLM")
ModelRegistry.register_model(
"DeepseekV3ForCausalLM",
"vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM")
ModelRegistry.register_model(
"DeepseekV32ForCausalLM",
"vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM")
ModelRegistry.register_model(
"DeepSeekMTPModel",
"vllm_ascend.models.deepseek_mtp:CustomDeepSeekMTP")
# There is no PanguProMoEForCausalLM in vLLM, so we should register it before vLLM config initialization
# to make sure the model can be loaded correctly. This register step can be removed once vLLM support PanguProMoEForCausalLM.
ModelRegistry.register_model(
"PanguProMoEForCausalLM",
"vllm_ascend.torchair.models.torchair_pangu_moe:PanguProMoEForCausalLM"
)
ModelRegistry.register_model(
"Qwen3NextForCausalLM",
"vllm_ascend.models.qwen3_next:CustomQwen3NextForCausalLM")