[EPLB][bugfix] Bugfix for fused mc2 (#6794)

### What this PR does / why we need it?
This pull request addresses a bug related to the fused mc2 functionality
within the EPLB (Expert Parallelism Load Balancing) system, specifically
impacting quantization and MoE communication.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
83b47f67b1

Signed-off-by: Spicy-Stick <873805887@qq.com>
Signed-off-by: root <root@localhost.localdomain>
This commit is contained in:
JIACHENG XU
2026-03-09 11:26:57 +08:00
committed by GitHub
parent 06ec136f08
commit 23bf5d4d48
5 changed files with 50 additions and 28 deletions

View File

@@ -70,7 +70,7 @@ class FusedExpertsResult:
before_dispatch_evt: torch.npu.Event | None = None
before_combine_evt: torch.npu.Event | None = None
# For dynamic_eplb
group_list_type: int | None = None
group_list_type: int = 1
expert_tokens: torch.Tensor | None = None
@@ -355,7 +355,6 @@ class FusedMC2CommImpl(MoECommMethod):
if log2phy is not None:
topk_ids = log2phy[topk_ids]
group_list_type = None
expert_tokens = None
if envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 1:
out = torch.empty_like(hidden_states)
@@ -375,7 +374,6 @@ class FusedMC2CommImpl(MoECommMethod):
expert_tokens = self.expert_token_nums
elif envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 2:
assert expert_map is not None, "expert_map cannot be None."
group_list_type = 1
out, expert_tokens = torch.ops._C_ascend.dispatch_gmm_combine_decode( # type: ignore
x=hidden_states,
expert_ids=topk_ids,
@@ -393,4 +391,4 @@ class FusedMC2CommImpl(MoECommMethod):
)
else:
raise ValueError(f"Wrong value of {envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2=}")
return FusedExpertsResult(routed_out=out, group_list_type=group_list_type, expert_tokens=expert_tokens)
return FusedExpertsResult(routed_out=out, expert_tokens=expert_tokens)