[EPLB] Display the expert hotness comparison before and after eplb. (#6877)
### What this PR does / why we need it?
To intuitively show the effect of the eplb algorithm, we print the
expert heat before and after eplb.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?

- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
@@ -34,7 +34,6 @@ class EplbUpdator:
|
||||
self.eplb_loader = loader
|
||||
self.eplb_process = eplb_process
|
||||
self.shared_dict = self.eplb_process.shared_dict
|
||||
self.moe_imbalance_dict: dict[int, float] = {}
|
||||
self.comm_group = get_dynamic_eplb_group()
|
||||
|
||||
def set_adaptor(self, adaptor: VllmEplbAdaptor):
|
||||
@@ -137,44 +136,8 @@ class EplbUpdator:
|
||||
self.shared_dict["moe_load"] = moe_load.cpu()
|
||||
logger.debug(f"[ModelRunner] Updated shared_dict['moe_load'] shape={moe_load.shape}")
|
||||
|
||||
if dist.get_rank() == 0:
|
||||
self.compute_moe_imbalance(moe_load)
|
||||
self.summarize_moe_imbalance()
|
||||
|
||||
return moe_load
|
||||
|
||||
def compute_moe_imbalance(self, moe_load: torch.Tensor):
|
||||
self.moe_imbalance_dict.clear()
|
||||
|
||||
layer_card_load = moe_load.sum(dim=-1).cpu().float()
|
||||
|
||||
for layer_idx in range(layer_card_load.size(0)):
|
||||
layer_load = layer_card_load[layer_idx]
|
||||
|
||||
mean_load = layer_load.mean().item()
|
||||
max_load = layer_load.max().item()
|
||||
|
||||
moe_load_imbalance = max_load / (mean_load + 1e-6)
|
||||
|
||||
logger.debug(f"[ModelRunner][MOE_load_stats][Layer {layer_idx}] PAR={moe_load_imbalance:.4f}")
|
||||
|
||||
self.moe_imbalance_dict[layer_idx] = moe_load_imbalance
|
||||
|
||||
def summarize_moe_imbalance(self):
|
||||
values = list(self.moe_imbalance_dict.values())
|
||||
if not values:
|
||||
logger.info("[MOE_load_stats] No data available.")
|
||||
return
|
||||
|
||||
avg_imbalance = sum(values) / len(values)
|
||||
max_imbalance = max(values)
|
||||
min_imbalance = min(values)
|
||||
|
||||
logger.info(
|
||||
f"[ModelRunner][MOE_load_stats] Peak-to-Average-Ratio: "
|
||||
f"Mean={avg_imbalance:.4f}, Max={max_imbalance:.4f}, Min={min_imbalance:.4f}"
|
||||
)
|
||||
|
||||
def warm_up_eplb(self):
|
||||
self.shared_dict["expert_maps"] = self.adaptor.get_global_expert_map()
|
||||
self.compute_and_set_moe_load()
|
||||
|
||||
Reference in New Issue
Block a user