[EPLB] Display the expert hotness comparison before and after eplb. (#6877)

### What this PR does / why we need it? To intuitively show the effect of the eplb algorithm, we print the expert heat before and after eplb. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? ![Snipaste_2026-02-28_17-23-42](https://github.com/user-attachments/assets/db1dadd1-cf96-44da-af34-57d41ccf412f) - vLLM version: v0.16.0 - vLLM main: 15d76f74e2 Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2026-03-06 09:53:29 +08:00
parent 18b52afe2b
commit ccd00798f3
3 changed files with 42 additions and 38 deletions
--- a/vllm_ascend/eplb/eplb_updator.py
+++ b/vllm_ascend/eplb/eplb_updator.py
@@ -34,7 +34,6 @@ class EplbUpdator:
        self.eplb_loader = loader
        self.eplb_process = eplb_process
        self.shared_dict = self.eplb_process.shared_dict
-        self.moe_imbalance_dict: dict[int, float] = {}
        self.comm_group = get_dynamic_eplb_group()

    def set_adaptor(self, adaptor: VllmEplbAdaptor):
@@ -137,44 +136,8 @@ class EplbUpdator:
        self.shared_dict["moe_load"] = moe_load.cpu()
        logger.debug(f"[ModelRunner] Updated shared_dict['moe_load'] shape={moe_load.shape}")

-        if dist.get_rank() == 0:
-            self.compute_moe_imbalance(moe_load)
-            self.summarize_moe_imbalance()
-
        return moe_load

-    def compute_moe_imbalance(self, moe_load: torch.Tensor):
-        self.moe_imbalance_dict.clear()
-
-        layer_card_load = moe_load.sum(dim=-1).cpu().float()
-
-        for layer_idx in range(layer_card_load.size(0)):
-            layer_load = layer_card_load[layer_idx]
-
-            mean_load = layer_load.mean().item()
-            max_load = layer_load.max().item()
-
-            moe_load_imbalance = max_load / (mean_load + 1e-6)
-
-            logger.debug(f"[ModelRunner][MOE_load_stats][Layer {layer_idx}] PAR={moe_load_imbalance:.4f}")
-
-            self.moe_imbalance_dict[layer_idx] = moe_load_imbalance
-
-    def summarize_moe_imbalance(self):
-        values = list(self.moe_imbalance_dict.values())
-        if not values:
-            logger.info("[MOE_load_stats] No data available.")
-            return
-
-        avg_imbalance = sum(values) / len(values)
-        max_imbalance = max(values)
-        min_imbalance = min(values)
-
-        logger.info(
-            f"[ModelRunner][MOE_load_stats] Peak-to-Average-Ratio: "
-            f"Mean={avg_imbalance:.4f}, Max={max_imbalance:.4f}, Min={min_imbalance:.4f}"
-        )
-
    def warm_up_eplb(self):
        self.shared_dict["expert_maps"] = self.adaptor.get_global_expert_map()
        self.compute_and_set_moe_load()