[EPLB]Record expert map without dynamic eplb. (#3409)
What this PR does / why we need it? 1.Record expert map without dynamic eplb. 2.Add export PYTHONOPTIMIZE=1 when using dynamic eplb. 3.change eplb doc Does this PR introduce any user-facing change? How was this patch tested? Qwen3_moe in A3. - vLLM version: v0.11.0 --------- Signed-off-by: offline0806 <3337230449@qq.com> Co-authored-by: offline0806 <3337230449@qq.com>
This commit is contained in:
@@ -134,6 +134,7 @@ class TorchairAscendW4A8DynamicFusedMoEMethod:
|
||||
self.ep_group = get_ep_group()
|
||||
|
||||
ascend_config = get_ascend_config()
|
||||
self.dynamic_eplb = ascend_config.dynamic_eplb or ascend_config.expert_map_record_path
|
||||
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
|
||||
|
||||
vllm_config = get_current_vllm_config()
|
||||
@@ -336,7 +337,8 @@ class TorchairAscendW4A8DynamicFusedMoEMethod:
|
||||
is_torchair=self.torchair_graph_enabled,
|
||||
quantized_x_for_share=shared_gate_up,
|
||||
dynamic_scale_for_share=shared_dequant_scale,
|
||||
mc2_mask=kwargs.get("mc2_mask", None))
|
||||
mc2_mask=kwargs.get("mc2_mask", None),
|
||||
dynamic_eplb=self.dynamic_eplb)
|
||||
else:
|
||||
# The current implementation of deepseek moe splits hidden_states
|
||||
# according to tp_size before they are feed into layers module.
|
||||
|
||||
@@ -848,7 +848,7 @@ class TorchairAscendW8A8DynamicFusedMoEMethod:
|
||||
self.ep_group = get_ep_group()
|
||||
|
||||
ascend_config = get_ascend_config()
|
||||
self.dynamic_eplb = ascend_config.dynamic_eplb
|
||||
self.dynamic_eplb = ascend_config.dynamic_eplb or ascend_config.expert_map_record_path
|
||||
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
|
||||
self.enable_shared_expert_dp = ascend_config.enable_shared_expert_dp
|
||||
|
||||
|
||||
Reference in New Issue
Block a user