[EPLB] Display the expert hotness comparison before and after eplb. (#6877)

### What this PR does / why we need it?
To intuitively show the effect of the eplb algorithm, we print the
expert heat before and after eplb.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

![Snipaste_2026-02-28_17-23-42](https://github.com/user-attachments/assets/db1dadd1-cf96-44da-af34-57d41ccf412f)


- vLLM version: v0.16.0
- vLLM main:
15d76f74e2

Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
LI SHENGYONG
2026-03-06 09:53:29 +08:00
committed by GitHub
parent 18b52afe2b
commit ccd00798f3
3 changed files with 42 additions and 38 deletions

View File

@@ -17,6 +17,7 @@
from multiprocessing import Process, Queue
from typing import Any
import numpy as np
import torch
import torch.distributed as dist
from vllm.logger import logger
@@ -60,6 +61,16 @@ class EplbWorker:
old_placement = self.global2local(self.old_expert_maps, self.num_local_experts)
_, _, new_placement = self.calculate_rebalance_experts(load_info, old_placement)
if self.rank_id == 0:
hotness = self._calculate_hotness(old_placement, load_info)
current_mean, current_max = self._compute_imbalance(old_placement, hotness)
update_mean, update_max = self._compute_imbalance(new_placement, hotness)
logger.info(
"[Expert Hotness] Current: mean={:.3f}, max={:.3f}, Updated: mean={:.3f}, max={:.3f}".format(
current_mean, current_max, update_mean, update_max
)
)
if not torch.is_tensor(new_placement):
new_placement = torch.tensor(new_placement)
self.check_expert_placement(old_placement, new_placement)
@@ -251,6 +262,36 @@ class EplbWorker:
return list(zip(send_all, recv_all, maps, log2phy_all, layer_ids))
@staticmethod
def _compute_imbalance(deployment_all_layer, hotness_all_layer: np.ndarray):
imbalance_list = []
deployment_all_layer = np.array(deployment_all_layer)
for deployment, hotness in zip(deployment_all_layer, hotness_all_layer):
counts = np.bincount(deployment.reshape(-1), minlength=hotness.shape[0])
unit_hotness = np.divide(hotness, counts, out=np.zeros_like(hotness, dtype=float), where=counts != 0)
stage_load = unit_hotness[deployment].sum(-1)
stage_par = stage_load.max() / stage_load.mean()
imbalance_list.append(stage_par)
max_val = max(imbalance_list)
mean_val = sum(imbalance_list) / len(imbalance_list)
return mean_val, max_val
@staticmethod
def _calculate_hotness(deployment_all_layer, moe_load_all_layer):
hotnesses = []
num_of_expert = deployment_all_layer.shape[1] * deployment_all_layer.shape[2]
for deployment, rank_load in zip(deployment_all_layer, moe_load_all_layer.numpy()):
hotness = np.zeros(num_of_expert, dtype=rank_load.dtype)
deployment_flat = deployment.ravel()
rank_load_flat = rank_load.ravel()
np.add.at(hotness, deployment_flat, rank_load_flat)
hotnesses.append(hotness)
return np.array(hotnesses)
class EplbProcess:
def __init__(self, shared_dict, policy_type: int = 0, enable_d2d: bool = True):