[EPLB]Record expert map without dynamic eplb. (#3409)

What this PR does / why we need it? 1.Record expert map without dynamic eplb. 2.Add export PYTHONOPTIMIZE=1 when using dynamic eplb. 3.change eplb doc Does this PR introduce any user-facing change? How was this patch tested? Qwen3_moe in A3. - vLLM version: v0.11.0 --------- Signed-off-by: offline0806 <3337230449@qq.com> Co-authored-by: offline0806 <3337230449@qq.com>
2025-10-15 14:21:15 +08:00
parent 4f937f561d
commit 5a3082cd15
9 changed files with 49 additions and 15 deletions
--- a/vllm_ascend/quantization/w4a8_dynamic.py
+++ b/vllm_ascend/quantization/w4a8_dynamic.py
@@ -140,7 +140,8 @@ class AscendW4A8DynamicFusedMoEMethod:
        # NOTE: new quantize weights: 2 int4 pack into int8
        self.new_quant_version = quant_version == "1.0.0"
        self.tp_size = 1 if vllm_config.parallel_config.enable_expert_parallel else self.ep_group.world_size
-        self.dynamic_eplb = get_ascend_config().dynamic_eplb
+        ascend_config = get_ascend_config()
+        self.dynamic_eplb = ascend_config.dynamic_eplb or ascend_config.expert_map_record_path
        if self.new_quant_version and self.tp_size > 16:
            raise ValueError(
                "The current weight does not support moe part tp>16.")
--- a/vllm_ascend/quantization/w8a8_dynamic.py
+++ b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -124,7 +124,7 @@ class AscendW8A8DynamicFusedMoEMethod:
            vllm_config.compilation_config.level == CompilationLevel.PIECEWISE
            and not vllm_config.model_config.enforce_eager
            and not ascend_config.torchair_graph_config.enabled)
-        self.dynamic_eplb = ascend_config.dynamic_eplb
+        self.dynamic_eplb = ascend_config.dynamic_eplb or ascend_config.expert_map_record_path

        try:
            device_group = get_mc2_group().device_group