enable ep32 for dispatch_ffn_combine (#5787)

### What this PR does / why we need it? To support dispatch_ffn_combine ep32 enabled ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Single operator tested --------- Signed-off-by: lhchg <lhao_cheng@163.com>
2026-01-13 14:35:52 +08:00
parent 84d4f474c0
commit 4b679984de
2 changed files with 2 additions and 2 deletions
--- a/vllm_ascend/ascend_forward_context.py
+++ b/vllm_ascend/ascend_forward_context.py
@@ -244,7 +244,7 @@ def select_moe_comm_method(num_tokens: int,
        # TODO: drop the EP-size guard when dispatch_ffn_combine supports larger EP sizes
        # TODO: drop speculative method guard when dispatch_gmm_combine_decode supports w16a16
        fused_mc2_enable = envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 and quant_type == "w8a8_dynamic"
-        dispatch_ffn_combine_enable = get_ep_group().world_size <= 16 and (
+        dispatch_ffn_combine_enable = get_ep_group().world_size <= 32 and (
            not is_draft_model) and (not dynamic_eplb)
        if num_tokens <= mc2_tokens_capacity:
            fused_decode_enable = fused_mc2_enable
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -123,7 +123,7 @@ env_variables: Dict[str, Callable[[], Any]] = {
    # Whether to enable fused mc2(`dispatch_gmm_combine_decode`/`dispatch_ffn_combine` operator)
    # 0, or not set: default ALLTOALL and MC2 will be used.
    # 1: ALLTOALL and MC2 might be replaced by `dispatch_ffn_combine` operator.
-    # `dispatch_ffn_combine` can be used only for moe layer with W8A8, EP<=16, non-mtp, non-dynamic-eplb.
+    # `dispatch_ffn_combine` can be used only for moe layer with W8A8, EP<=32, non-mtp, non-dynamic-eplb.
    # 2: MC2 might be replaced by `dispatch_gmm_combine_decode` operator.
    # `dispatch_gmm_combine_decode` can be used only for **decode node** moe layer
    # with W8A8. And MTP layer must be W8A8.