add gatherep select. (#2740)

### What this PR does / why we need it? add gatherep select. - vLLM version: v0.10.1.1 - vLLM main: e599e2c65e Signed-off-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com> Co-authored-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com>
2025-09-08 09:15:50 +08:00
parent 6666e5265d
commit 2693196ef8
3 changed files with 31 additions and 6 deletions
--- a/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py
+++ b/vllm_ascend/ops/moe_dispatcher/token_dispatcher.py
@@ -28,6 +28,7 @@ import torch
 import torch_npu
 from vllm.distributed.parallel_state import get_ep_group

+import vllm_ascend.envs as envs_ascend
 from vllm_ascend.distributed.parallel_state import get_mc2_group
 from vllm_ascend.distributed.tensor_parallel import \
    gather_from_sequence_parallel_region
@@ -50,6 +51,9 @@ def setup_token_dispatchers(ep_size: int, **kwargs):

    if ep_size == 1 and "TokenDispatcherWithAllGather" not in existing_dispatchers:
        _register_token_dispatcher(TokenDispatcherWithAllGather(**kwargs))
+    elif envs_ascend.VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP and ep_size > 1 \
+        and "TokenDispatcherWithAllGather" not in existing_dispatchers:
+        _register_token_dispatcher(TokenDispatcherWithAllGather(**kwargs))
    elif ep_size < 16 and "TokenDispatcherWithAll2AllV" not in existing_dispatchers:
        _register_token_dispatcher(TokenDispatcherWithAll2AllV(**kwargs))
    elif ep_size >= 16: