[EPLB][Bugfix] policy_swift_balancer bugfix and renaming (#5897)
### What this PR does / why we need it?
1. Rename dynamic_ep to default_eplb.
2. Rename dynamic_ep_v2 to swift_balancer
3. Discard func compose_expert_update_info_bipartite.
- vLLM version: v0.13.0
- vLLM main:
bde38c11df
Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
@@ -73,12 +73,8 @@ class EplbWorker:
|
||||
new_expert_maps = self.local2global(new_placement)
|
||||
self.update_expert_map(new_expert_maps)
|
||||
|
||||
if self.policy_type == 2:
|
||||
update_info = self.compose_expert_update_info_bipartite(
|
||||
new_expert_maps, self.old_expert_maps)
|
||||
else:
|
||||
update_info = self.compose_expert_update_info_greedy(
|
||||
new_expert_maps, self.old_expert_maps)
|
||||
update_info = self.compose_expert_update_info_greedy(
|
||||
new_expert_maps, self.old_expert_maps)
|
||||
self.old_expert_maps = new_expert_maps
|
||||
logger.info("EPLB Process compute complete")
|
||||
|
||||
@@ -124,112 +120,6 @@ class EplbWorker:
|
||||
new_placement[layer_id] = old_placement[layer_id]
|
||||
break
|
||||
|
||||
def compose_expert_update_info_bipartite(self, updated_expert_maps_org,
|
||||
current_expert_maps_org):
|
||||
# transform numpy array to torch tensor
|
||||
updated_expert_maps = updated_expert_maps_org.clone()
|
||||
current_expert_maps = current_expert_maps_org.clone()
|
||||
updated_expert_maps = np.array(updated_expert_maps)
|
||||
current_expert_maps = np.array(current_expert_maps)
|
||||
|
||||
num_layers = current_expert_maps.shape[0]
|
||||
|
||||
for layer_id in range(num_layers):
|
||||
updated_expert_maps_this_layer = updated_expert_maps[layer_id]
|
||||
current_expert_maps_this_layer = current_expert_maps[layer_id]
|
||||
updated_expert_maps_this_layer_org = updated_expert_maps_org[
|
||||
layer_id]
|
||||
|
||||
from typing import Any
|
||||
|
||||
expert_send_info_this_layer: dict[Any, Any] = {}
|
||||
expert_recv_info_this_layer: dict[Any, Any] = {}
|
||||
|
||||
# Guard Clause: if there is no expert weight update, avoid subsequent processing
|
||||
if (np.equal(updated_expert_maps_this_layer,
|
||||
current_expert_maps_this_layer)).all():
|
||||
yield (expert_send_info_this_layer,
|
||||
expert_recv_info_this_layer,
|
||||
updated_expert_maps_this_layer_org, layer_id)
|
||||
|
||||
# Parse expert_ids each rank needs to receive from other ranks
|
||||
dst_rank_indices, experts_to_recv = np.where(
|
||||
(current_expert_maps_this_layer == -1)
|
||||
& (updated_expert_maps_this_layer != -1))
|
||||
|
||||
# record src ranks for potential transfer
|
||||
src_ranks_set = dict()
|
||||
for idx in range(len(dst_rank_indices)):
|
||||
expert_id = experts_to_recv[idx].item()
|
||||
if expert_id not in src_ranks_set:
|
||||
src_ranks_set[expert_id] = np.where(
|
||||
current_expert_maps_this_layer[:, expert_id] != -1)[0]
|
||||
|
||||
# loop until all experts are scheduled
|
||||
while len(dst_rank_indices) > 0:
|
||||
# construct bipartite graph
|
||||
graph_expert_update: nx.Graph = nx.Graph()
|
||||
for idx in range(len(dst_rank_indices)):
|
||||
dst_rank_id = dst_rank_indices[idx].item()
|
||||
expert_id = experts_to_recv[idx].item()
|
||||
# add src ranks
|
||||
src_rank_ids = src_ranks_set[expert_id]
|
||||
graph_expert_update.add_nodes_from(src_rank_ids,
|
||||
bipartite=0)
|
||||
# add dest rank
|
||||
graph_expert_update.add_node(str(dst_rank_id), bipartite=1)
|
||||
# add edges
|
||||
for src_rank_id in src_rank_ids:
|
||||
graph_expert_update.add_edge(src_rank_id,
|
||||
str(dst_rank_id))
|
||||
|
||||
# graph may not be connected
|
||||
connected_components = list(
|
||||
nx.connected_components(graph_expert_update))
|
||||
all_matches = {}
|
||||
# matching in this loop
|
||||
for i, component in enumerate(connected_components):
|
||||
subgraph = graph_expert_update.subgraph(component)
|
||||
component_matching = nx.bipartite.maximum_matching(
|
||||
subgraph)
|
||||
all_matches.update(component_matching)
|
||||
|
||||
for src_rank, dst_rank in all_matches.items():
|
||||
dst_rank = int(dst_rank)
|
||||
assert src_rank != dst_rank
|
||||
if graph_expert_update.nodes[src_rank]['bipartite'] == 0:
|
||||
# currently not scheduled experts in rank dst_rank
|
||||
experts_v = experts_to_recv[np.where(
|
||||
dst_rank_indices == dst_rank)]
|
||||
# src: src_rank, dest: dst_rank, expert: expert_id
|
||||
expert_id = np.intersect1d(
|
||||
experts_v,
|
||||
np.where(current_expert_maps_this_layer[src_rank]
|
||||
!= -1))[0]
|
||||
|
||||
# record send/rcv pairs
|
||||
if src_rank not in expert_send_info_this_layer:
|
||||
expert_send_info_this_layer[src_rank] = []
|
||||
if dst_rank not in expert_recv_info_this_layer:
|
||||
expert_recv_info_this_layer[dst_rank] = []
|
||||
expert_send_info_this_layer[src_rank].append(
|
||||
(dst_rank, expert_id))
|
||||
expert_recv_info_this_layer[dst_rank].append(
|
||||
(src_rank, expert_id))
|
||||
|
||||
remove_index = np.where(
|
||||
np.logical_and(dst_rank_indices == dst_rank,
|
||||
experts_to_recv == expert_id))
|
||||
|
||||
# update
|
||||
dst_rank_indices = np.delete(dst_rank_indices,
|
||||
remove_index)
|
||||
experts_to_recv = np.delete(experts_to_recv,
|
||||
remove_index)
|
||||
|
||||
yield (expert_send_info_this_layer, expert_recv_info_this_layer,
|
||||
updated_expert_maps_this_layer_org, layer_id)
|
||||
|
||||
# TODO: Here only expert weight exchange is considered, need to be extended to cover other weight update cases
|
||||
def compose_expert_update_info_greedy(self, updated_expert_maps,
|
||||
current_expert_maps):
|
||||
|
||||
@@ -24,7 +24,7 @@ class DynamicTable:
|
||||
placement_table = None
|
||||
|
||||
|
||||
class DynamicEplb(EplbPolicy):
|
||||
class DefaultEplb(EplbPolicy):
|
||||
|
||||
def __init__(self, config: DynamicConfig):
|
||||
super().__init__(config)
|
||||
@@ -1,8 +1,8 @@
|
||||
# Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
|
||||
# Todo: Once https://github.com/vllm-project/vllm/pull/24069 is merged in vllm. Remove this factory.
|
||||
from .policy_abstract import DynamicConfig, EplbPolicy
|
||||
from .policy_dynamic_ep import DynamicEplb
|
||||
from .policy_dynamic_ep_v2 import DynamicEplbV2
|
||||
from .policy_default_eplb import DefaultEplb
|
||||
from .policy_swift_balancer import SwiftBalanceEplb
|
||||
from .policy_flashlb import FlashLB, warm_up
|
||||
from .policy_random import RandomLoadBalance
|
||||
|
||||
@@ -20,9 +20,9 @@ class PolicyFactory:
|
||||
0:
|
||||
RandomLoadBalance, # RandomLoadBalance: shuffle last physical expert on NPU 1 and 3
|
||||
1:
|
||||
DynamicEplb, # Dynamic EPLB policy: overall expert replacement based on current moe load
|
||||
DefaultEplb, # Dynamic EPLB policy: overall expert replacement based on current moe load
|
||||
2:
|
||||
DynamicEplbV2, # Dynamic EPLB policy V2: expert replacement with constrained number of expert shuffle
|
||||
SwiftBalanceEplb, # Dynamic EPLB policy V2: expert replacement with constrained number of expert shuffle
|
||||
3:
|
||||
FlashLB, # FlashLB EPLB policy: expert replacement based on Joint Optimization, Multi-Shot Enhancement and Incremental Adjustment
|
||||
}
|
||||
|
||||
@@ -62,7 +62,7 @@ class DynamicTable:
|
||||
placement_table = None
|
||||
|
||||
|
||||
class DynamicEplbV2(EplbPolicy):
|
||||
class SwiftBalanceEplb(EplbPolicy):
|
||||
|
||||
def __init__(self, config: DynamicConfig):
|
||||
super().__init__(config)
|
||||
Reference in New Issue
Block a user