[EPLB][Bugfix] Dispatch Allgather use log2phy if enable eplb (#5933)

### What this PR does / why we need it?
1. Move the logic of expert mapping forward to prevent shotgun changes
2. Disable the update of expert map.

### How was this patch tested?
a2
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| GPQA_diamond | 53064e | accuracy | gen | 73.23 |

a3
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 83.33 |


- vLLM version: v0.13.0
- vLLM main:
11b6af5280

Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
LI SHENGYONG
2026-01-19 09:24:25 +08:00
committed by GitHub
parent 9fed2636cb
commit bc1f6713e7
6 changed files with 6 additions and 53 deletions

View File

@@ -80,7 +80,6 @@ class MoETokenDispatcher(ABC):
topk_weights: torch.Tensor,
topk_ids: torch.Tensor,
expert_map: Optional[torch.Tensor] = None,
log2phy: Optional[torch.Tensor] = None,
global_redundant_expert_num: int = 0,
mc2_mask: Optional[torch.Tensor] = None,
apply_router_weight_on_input: bool = False,
@@ -188,7 +187,6 @@ class TokenDispatcherWithMC2(MoETokenDispatcher):
topk_weights: torch.Tensor,
topk_ids: torch.Tensor,
expert_map: Optional[torch.Tensor] = None,
log2phy: Optional[torch.Tensor] = None,
global_redundant_expert_num: int = 0,
mc2_mask: Optional[torch.Tensor] = None,
apply_router_weight_on_input: bool = False,
@@ -197,10 +195,6 @@ class TokenDispatcherWithMC2(MoETokenDispatcher):
pertoken_scale: Optional[torch.Tensor] = None):
self.with_quant = with_quant
# Apply log2phy if needed
if log2phy is not None:
topk_ids = log2phy[topk_ids]
kwargs_mc2 = self.get_dispatch_mc2_kwargs(hidden_states, topk_weights,
topk_ids, expert_map,
mc2_mask,
@@ -309,7 +303,6 @@ class TokenDispatcherWithAllGather(MoETokenDispatcher):
topk_weights: torch.Tensor,
topk_ids: torch.Tensor,
expert_map: Optional[torch.Tensor] = None,
log2phy: Optional[torch.Tensor] = None,
global_redundant_expert_num: int = 0,
mc2_mask: Optional[torch.Tensor] = None,
apply_router_weight_on_input: bool = False,
@@ -429,7 +422,6 @@ class TokenDispatcherWithAll2AllV(MoETokenDispatcher):
topk_weights: torch.Tensor,
topk_ids: torch.Tensor,
expert_map: Optional[torch.Tensor] = None,
log2phy: Optional[torch.Tensor] = None,
global_redundant_expert_num: int = 0,
mc2_mask: Optional[torch.Tensor] = None,
apply_router_weight_on_input: bool = False,
@@ -439,9 +431,6 @@ class TokenDispatcherWithAll2AllV(MoETokenDispatcher):
self.with_quant = with_quant
self.hidden_shape = hidden_states.shape
if log2phy is not None:
topk_ids = log2phy[topk_ids]
(
permutated_local_input_tokens,
reversed_local_input_permutation_mapping,