eplb redundant expert bugfix (#4291)
### What this PR does / why we need it?
Redundant experts bugfix
### Does this PR introduce _any_ user-facing change?
After configuring the path for experts_map, users do not need to
configure iinit_redundancy_expert.
### How was this patch tested?
The accuracy of EPLB was tested with and without the use of redundant
experts.
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
---------
Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
@@ -342,7 +342,7 @@ class AscendW4A8DynamicFusedMoEMethod:
|
||||
scoring_func: str = "softmax",
|
||||
e_score_correction_bias: Optional[torch.Tensor] = None,
|
||||
is_prefill: bool = True,
|
||||
enable_force_load_balance: bool = True,
|
||||
enable_force_load_balance: bool = False,
|
||||
log2phy: torch.Tensor = None,
|
||||
global_redundant_expert_num: int = 0,
|
||||
shared_experts: Optional[Any] = None,
|
||||
@@ -371,7 +371,8 @@ class AscendW4A8DynamicFusedMoEMethod:
|
||||
# to avoid accumulating too much tokens on a single rank.
|
||||
# currently it is only activated when doing profile runs.
|
||||
if enable_force_load_balance:
|
||||
topk_ids = torch.randint_like(topk_ids, 0, global_num_experts)
|
||||
topk_ids = torch.randint_like(
|
||||
topk_ids, 0, global_num_experts - global_redundant_expert_num)
|
||||
|
||||
topk_weights = topk_weights.to(x.dtype)
|
||||
|
||||
|
||||
@@ -213,7 +213,7 @@ class AscendW8A8DynamicFusedMoEMethod:
|
||||
scoring_func: str = "softmax",
|
||||
e_score_correction_bias: Optional[torch.Tensor] = None,
|
||||
is_prefill: bool = True,
|
||||
enable_force_load_balance: bool = True,
|
||||
enable_force_load_balance: bool = False,
|
||||
log2phy: torch.Tensor = None,
|
||||
global_redundant_expert_num: int = 0,
|
||||
shared_experts: Optional[Any] = None,
|
||||
@@ -242,7 +242,8 @@ class AscendW8A8DynamicFusedMoEMethod:
|
||||
# to avoid accumulating too much tokens on a single rank.
|
||||
# currently it is only activated when doing profile runs.
|
||||
if enable_force_load_balance:
|
||||
topk_ids = torch.randint_like(topk_ids, 0, global_num_experts)
|
||||
topk_ids = torch.randint_like(
|
||||
topk_ids, 0, global_num_experts - global_redundant_expert_num)
|
||||
|
||||
topk_weights = topk_weights.to(self.in_dtype)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user