[EPLB]Record expert map without dynamic eplb. (#3409)

What this PR does / why we need it? 1.Record expert map without dynamic eplb. 2.Add export PYTHONOPTIMIZE=1 when using dynamic eplb. 3.change eplb doc Does this PR introduce any user-facing change? How was this patch tested? Qwen3_moe in A3. - vLLM version: v0.11.0 --------- Signed-off-by: offline0806 <3337230449@qq.com> Co-authored-by: offline0806 <3337230449@qq.com>
2025-10-15 14:21:15 +08:00
parent 4f937f561d
commit 5a3082cd15
9 changed files with 49 additions and 15 deletions
--- a/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py
+++ b/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py
@@ -134,6 +134,7 @@ class TorchairAscendW4A8DynamicFusedMoEMethod:
        self.ep_group = get_ep_group()

        ascend_config = get_ascend_config()
+        self.dynamic_eplb = ascend_config.dynamic_eplb or ascend_config.expert_map_record_path
        self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled

        vllm_config = get_current_vllm_config()
@@ -336,7 +337,8 @@ class TorchairAscendW4A8DynamicFusedMoEMethod:
                is_torchair=self.torchair_graph_enabled,
                quantized_x_for_share=shared_gate_up,
                dynamic_scale_for_share=shared_dequant_scale,
-                mc2_mask=kwargs.get("mc2_mask", None))
+                mc2_mask=kwargs.get("mc2_mask", None),
+                dynamic_eplb=self.dynamic_eplb)
        else:
            # The current implementation of deepseek moe splits hidden_states
            # according to tp_size before they are feed into layers module.
--- a/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py
+++ b/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py
@@ -848,7 +848,7 @@ class TorchairAscendW8A8DynamicFusedMoEMethod:
        self.ep_group = get_ep_group()

        ascend_config = get_ascend_config()
-        self.dynamic_eplb = ascend_config.dynamic_eplb
+        self.dynamic_eplb = ascend_config.dynamic_eplb or ascend_config.expert_map_record_path
        self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled
        self.enable_shared_expert_dp = ascend_config.enable_shared_expert_dp