diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py index 70780f74..49cd3b0f 100644 --- a/vllm_ascend/ascend_forward_context.py +++ b/vllm_ascend/ascend_forward_context.py @@ -5,8 +5,7 @@ from typing import TYPE_CHECKING, Any, Optional import torch from vllm.config import CUDAGraphMode, VllmConfig -from vllm.distributed import (get_dp_group, get_ep_group, - get_tensor_model_parallel_world_size) +from vllm.distributed import get_dp_group, get_tensor_model_parallel_world_size from vllm.forward_context import (BatchDescriptor, get_forward_context, set_forward_context) @@ -29,25 +28,6 @@ class FusedMoEState(Enum): All2AllSeq = 5 -def get_fused_moe_state(ep_size: int, with_prefill: bool, - is_deepseek_v3_r1: bool): - # the fusion operator torch_npu.npu_grouped_matmul_finalize_routing called by allgather ep - # only supports deepseek v3/r1 - if (envs_ascend.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1 - and is_deepseek_v3_r1): - return FusedMoEState.AllGatherEP - elif ep_size == 1: - if with_prefill: - return FusedMoEState.NaiveMulticast - else: - return FusedMoEState.AllGather - # NOTE: mc2 need ep_size >= 16 & all2all can't use in torchair graph. - elif ep_size < 16 or with_prefill: - return FusedMoEState.All2All - else: - return FusedMoEState.MC2 - - class MoECommType(Enum): ALLGATHER = 0 MC2 = 1 @@ -95,16 +75,7 @@ def set_ascend_forward_context( forward_context.with_prefill = with_prefill tp_world_size = get_tensor_model_parallel_world_size() - ep_size = (get_ep_group().world_size if - vllm_config.parallel_config.enable_expert_parallel else 1) - # fused_moe_state is used in torchair, it will be deleted along with torchair - is_deepseek_v3_r1 = hasattr( - vllm_config.model_config.hf_config, 'n_routed_experts' - ) and vllm_config.model_config.hf_config.n_routed_experts == 256 - fused_moe_state = get_fused_moe_state(ep_size, with_prefill, - is_deepseek_v3_r1) - forward_context.fused_moe_state = fused_moe_state forward_context.in_profile_run = in_profile_run # NOTE: This cannot be set using set_forward_context