[EPLB][Bugfix] policy_swift_balancer bugfix and renaming (#5897)

### What this PR does / why we need it?
1. Rename dynamic_ep to default_eplb.
2. Rename dynamic_ep_v2 to swift_balancer
3. Discard func compose_expert_update_info_bipartite.

- vLLM version: v0.13.0
- vLLM main:
bde38c11df

Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
LI SHENGYONG
2026-01-19 13:47:40 +08:00
committed by GitHub
parent b27774dbd6
commit 83de5385b4
8 changed files with 39 additions and 149 deletions

View File

@@ -73,12 +73,8 @@ class EplbWorker:
new_expert_maps = self.local2global(new_placement)
self.update_expert_map(new_expert_maps)
if self.policy_type == 2:
update_info = self.compose_expert_update_info_bipartite(
new_expert_maps, self.old_expert_maps)
else:
update_info = self.compose_expert_update_info_greedy(
new_expert_maps, self.old_expert_maps)
update_info = self.compose_expert_update_info_greedy(
new_expert_maps, self.old_expert_maps)
self.old_expert_maps = new_expert_maps
logger.info("EPLB Process compute complete")
@@ -124,112 +120,6 @@ class EplbWorker:
new_placement[layer_id] = old_placement[layer_id]
break
def compose_expert_update_info_bipartite(self, updated_expert_maps_org,
current_expert_maps_org):
# transform numpy array to torch tensor
updated_expert_maps = updated_expert_maps_org.clone()
current_expert_maps = current_expert_maps_org.clone()
updated_expert_maps = np.array(updated_expert_maps)
current_expert_maps = np.array(current_expert_maps)
num_layers = current_expert_maps.shape[0]
for layer_id in range(num_layers):
updated_expert_maps_this_layer = updated_expert_maps[layer_id]
current_expert_maps_this_layer = current_expert_maps[layer_id]
updated_expert_maps_this_layer_org = updated_expert_maps_org[
layer_id]
from typing import Any
expert_send_info_this_layer: dict[Any, Any] = {}
expert_recv_info_this_layer: dict[Any, Any] = {}
# Guard Clause: if there is no expert weight update, avoid subsequent processing
if (np.equal(updated_expert_maps_this_layer,
current_expert_maps_this_layer)).all():
yield (expert_send_info_this_layer,
expert_recv_info_this_layer,
updated_expert_maps_this_layer_org, layer_id)
# Parse expert_ids each rank needs to receive from other ranks
dst_rank_indices, experts_to_recv = np.where(
(current_expert_maps_this_layer == -1)
& (updated_expert_maps_this_layer != -1))
# record src ranks for potential transfer
src_ranks_set = dict()
for idx in range(len(dst_rank_indices)):
expert_id = experts_to_recv[idx].item()
if expert_id not in src_ranks_set:
src_ranks_set[expert_id] = np.where(
current_expert_maps_this_layer[:, expert_id] != -1)[0]
# loop until all experts are scheduled
while len(dst_rank_indices) > 0:
# construct bipartite graph
graph_expert_update: nx.Graph = nx.Graph()
for idx in range(len(dst_rank_indices)):
dst_rank_id = dst_rank_indices[idx].item()
expert_id = experts_to_recv[idx].item()
# add src ranks
src_rank_ids = src_ranks_set[expert_id]
graph_expert_update.add_nodes_from(src_rank_ids,
bipartite=0)
# add dest rank
graph_expert_update.add_node(str(dst_rank_id), bipartite=1)
# add edges
for src_rank_id in src_rank_ids:
graph_expert_update.add_edge(src_rank_id,
str(dst_rank_id))
# graph may not be connected
connected_components = list(
nx.connected_components(graph_expert_update))
all_matches = {}
# matching in this loop
for i, component in enumerate(connected_components):
subgraph = graph_expert_update.subgraph(component)
component_matching = nx.bipartite.maximum_matching(
subgraph)
all_matches.update(component_matching)
for src_rank, dst_rank in all_matches.items():
dst_rank = int(dst_rank)
assert src_rank != dst_rank
if graph_expert_update.nodes[src_rank]['bipartite'] == 0:
# currently not scheduled experts in rank dst_rank
experts_v = experts_to_recv[np.where(
dst_rank_indices == dst_rank)]
# src: src_rank, dest: dst_rank, expert: expert_id
expert_id = np.intersect1d(
experts_v,
np.where(current_expert_maps_this_layer[src_rank]
!= -1))[0]
# record send/rcv pairs
if src_rank not in expert_send_info_this_layer:
expert_send_info_this_layer[src_rank] = []
if dst_rank not in expert_recv_info_this_layer:
expert_recv_info_this_layer[dst_rank] = []
expert_send_info_this_layer[src_rank].append(
(dst_rank, expert_id))
expert_recv_info_this_layer[dst_rank].append(
(src_rank, expert_id))
remove_index = np.where(
np.logical_and(dst_rank_indices == dst_rank,
experts_to_recv == expert_id))
# update
dst_rank_indices = np.delete(dst_rank_indices,
remove_index)
experts_to_recv = np.delete(experts_to_recv,
remove_index)
yield (expert_send_info_this_layer, expert_recv_info_this_layer,
updated_expert_maps_this_layer_org, layer_id)
# TODO: Here only expert weight exchange is considered, need to be extended to cover other weight update cases
def compose_expert_update_info_greedy(self, updated_expert_maps,
current_expert_maps):

View File

@@ -24,7 +24,7 @@ class DynamicTable:
placement_table = None
class DynamicEplb(EplbPolicy):
class DefaultEplb(EplbPolicy):
def __init__(self, config: DynamicConfig):
super().__init__(config)

View File

@@ -1,8 +1,8 @@
# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
# Todo: Once https://github.com/vllm-project/vllm/pull/24069 is merged in vllm. Remove this factory.
from .policy_abstract import DynamicConfig, EplbPolicy
from .policy_dynamic_ep import DynamicEplb
from .policy_dynamic_ep_v2 import DynamicEplbV2
from .policy_default_eplb import DefaultEplb
from .policy_swift_balancer import SwiftBalanceEplb
from .policy_flashlb import FlashLB, warm_up
from .policy_random import RandomLoadBalance
@@ -20,9 +20,9 @@ class PolicyFactory:
0:
RandomLoadBalance, # RandomLoadBalance: shuffle last physical expert on NPU 1 and 3
1:
DynamicEplb, # Dynamic EPLB policy: overall expert replacement based on current moe load
DefaultEplb, # Dynamic EPLB policy: overall expert replacement based on current moe load
2:
DynamicEplbV2, # Dynamic EPLB policy V2: expert replacement with constrained number of expert shuffle
SwiftBalanceEplb, # Dynamic EPLB policy V2: expert replacement with constrained number of expert shuffle
3:
FlashLB, # FlashLB EPLB policy: expert replacement based on Joint Optimization, Multi-Shot Enhancement and Incremental Adjustment
}

View File

@@ -62,7 +62,7 @@ class DynamicTable:
placement_table = None
class DynamicEplbV2(EplbPolicy):
class SwiftBalanceEplb(EplbPolicy):
def __init__(self, config: DynamicConfig):
super().__init__(config)