[EPLB][Bugfix] policy_swift_balancer bugfix and renaming (#5897)

### What this PR does / why we need it? 1. Rename dynamic_ep to default_eplb. 2. Rename dynamic_ep_v2 to swift_balancer 3. Discard func compose_expert_update_info_bipartite. - vLLM version: v0.13.0 - vLLM main: bde38c11df Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2026-01-19 13:47:40 +08:00
parent b27774dbd6
commit 83de5385b4
8 changed files with 39 additions and 149 deletions
--- a/vllm_ascend/eplb/core/eplb_worker.py
+++ b/vllm_ascend/eplb/core/eplb_worker.py
@@ -73,12 +73,8 @@ class EplbWorker:
        new_expert_maps = self.local2global(new_placement)
        self.update_expert_map(new_expert_maps)

-        if self.policy_type == 2:
-            update_info = self.compose_expert_update_info_bipartite(
-                new_expert_maps, self.old_expert_maps)
-        else:
-            update_info = self.compose_expert_update_info_greedy(
-                new_expert_maps, self.old_expert_maps)
+        update_info = self.compose_expert_update_info_greedy(
+            new_expert_maps, self.old_expert_maps)
        self.old_expert_maps = new_expert_maps
        logger.info("EPLB Process compute complete")

@@ -124,112 +120,6 @@ class EplbWorker:
                    new_placement[layer_id] = old_placement[layer_id]
                    break

-    def compose_expert_update_info_bipartite(self, updated_expert_maps_org,
-                                             current_expert_maps_org):
-        # transform numpy array to torch tensor
-        updated_expert_maps = updated_expert_maps_org.clone()
-        current_expert_maps = current_expert_maps_org.clone()
-        updated_expert_maps = np.array(updated_expert_maps)
-        current_expert_maps = np.array(current_expert_maps)
-
-        num_layers = current_expert_maps.shape[0]
-
-        for layer_id in range(num_layers):
-            updated_expert_maps_this_layer = updated_expert_maps[layer_id]
-            current_expert_maps_this_layer = current_expert_maps[layer_id]
-            updated_expert_maps_this_layer_org = updated_expert_maps_org[
-                layer_id]
-
-            from typing import Any
-
-            expert_send_info_this_layer: dict[Any, Any] = {}
-            expert_recv_info_this_layer: dict[Any, Any] = {}
-
-            # Guard Clause: if there is no expert weight update, avoid subsequent processing
-            if (np.equal(updated_expert_maps_this_layer,
-                         current_expert_maps_this_layer)).all():
-                yield (expert_send_info_this_layer,
-                       expert_recv_info_this_layer,
-                       updated_expert_maps_this_layer_org, layer_id)
-
-            # Parse expert_ids each rank needs to receive from other ranks
-            dst_rank_indices, experts_to_recv = np.where(
-                (current_expert_maps_this_layer == -1)
-                & (updated_expert_maps_this_layer != -1))
-
-            # record src ranks for potential transfer
-            src_ranks_set = dict()
-            for idx in range(len(dst_rank_indices)):
-                expert_id = experts_to_recv[idx].item()
-                if expert_id not in src_ranks_set:
-                    src_ranks_set[expert_id] = np.where(
-                        current_expert_maps_this_layer[:, expert_id] != -1)[0]
-
-            # loop until all experts are scheduled
-            while len(dst_rank_indices) > 0:
-                # construct bipartite graph
-                graph_expert_update: nx.Graph = nx.Graph()
-                for idx in range(len(dst_rank_indices)):
-                    dst_rank_id = dst_rank_indices[idx].item()
-                    expert_id = experts_to_recv[idx].item()
-                    # add src ranks
-                    src_rank_ids = src_ranks_set[expert_id]
-                    graph_expert_update.add_nodes_from(src_rank_ids,
-                                                       bipartite=0)
-                    # add dest rank
-                    graph_expert_update.add_node(str(dst_rank_id), bipartite=1)
-                    # add edges
-                    for src_rank_id in src_rank_ids:
-                        graph_expert_update.add_edge(src_rank_id,
-                                                     str(dst_rank_id))
-
-                # graph may not be connected
-                connected_components = list(
-                    nx.connected_components(graph_expert_update))
-                all_matches = {}
-                # matching in this loop
-                for i, component in enumerate(connected_components):
-                    subgraph = graph_expert_update.subgraph(component)
-                    component_matching = nx.bipartite.maximum_matching(
-                        subgraph)
-                    all_matches.update(component_matching)
-
-                for src_rank, dst_rank in all_matches.items():
-                    dst_rank = int(dst_rank)
-                    assert src_rank != dst_rank
-                    if graph_expert_update.nodes[src_rank]['bipartite'] == 0:
-                        # currently not scheduled experts in rank dst_rank
-                        experts_v = experts_to_recv[np.where(
-                            dst_rank_indices == dst_rank)]
-                        # src: src_rank, dest: dst_rank, expert: expert_id
-                        expert_id = np.intersect1d(
-                            experts_v,
-                            np.where(current_expert_maps_this_layer[src_rank]
-                                     != -1))[0]
-
-                        # record send/rcv pairs
-                        if src_rank not in expert_send_info_this_layer:
-                            expert_send_info_this_layer[src_rank] = []
-                        if dst_rank not in expert_recv_info_this_layer:
-                            expert_recv_info_this_layer[dst_rank] = []
-                        expert_send_info_this_layer[src_rank].append(
-                            (dst_rank, expert_id))
-                        expert_recv_info_this_layer[dst_rank].append(
-                            (src_rank, expert_id))
-
-                        remove_index = np.where(
-                            np.logical_and(dst_rank_indices == dst_rank,
-                                           experts_to_recv == expert_id))
-
-                        # update
-                        dst_rank_indices = np.delete(dst_rank_indices,
-                                                     remove_index)
-                        experts_to_recv = np.delete(experts_to_recv,
-                                                    remove_index)
-
-            yield (expert_send_info_this_layer, expert_recv_info_this_layer,
-                   updated_expert_maps_this_layer_org, layer_id)
-
    # TODO: Here only expert weight exchange is considered, need to be extended to cover other weight update cases
    def compose_expert_update_info_greedy(self, updated_expert_maps,
                                          current_expert_maps):
--- a/vllm_ascend/eplb/core/policy/policy_default_eplb.py
+++ b/vllm_ascend/eplb/core/policy/policy_default_eplb.py
@@ -24,7 +24,7 @@ class DynamicTable:
    placement_table = None


-class DynamicEplb(EplbPolicy):
+class DefaultEplb(EplbPolicy):

    def __init__(self, config: DynamicConfig):
        super().__init__(config)
--- a/vllm_ascend/eplb/core/policy/policy_factory.py
+++ b/vllm_ascend/eplb/core/policy/policy_factory.py
@@ -1,8 +1,8 @@
 # Copyright Huawei Technologies Co., Ltd. 2023-2024. All rights reserved.
 # Todo: Once https://github.com/vllm-project/vllm/pull/24069 is merged in vllm. Remove this factory.
 from .policy_abstract import DynamicConfig, EplbPolicy
-from .policy_dynamic_ep import DynamicEplb
-from .policy_dynamic_ep_v2 import DynamicEplbV2
+from .policy_default_eplb import DefaultEplb
+from .policy_swift_balancer import SwiftBalanceEplb
 from .policy_flashlb import FlashLB, warm_up
 from .policy_random import RandomLoadBalance

@@ -20,9 +20,9 @@ class PolicyFactory:
            0:
            RandomLoadBalance,  # RandomLoadBalance: shuffle last physical expert on NPU 1 and 3
            1:
-            DynamicEplb,  # Dynamic EPLB policy: overall expert replacement based on current moe load
+            DefaultEplb,  # Dynamic EPLB policy: overall expert replacement based on current moe load
            2:
-            DynamicEplbV2,  # Dynamic EPLB policy V2:  expert replacement with constrained number of expert shuffle
+            SwiftBalanceEplb,  # Dynamic EPLB policy V2:  expert replacement with constrained number of expert shuffle
            3:
            FlashLB,  # FlashLB EPLB policy: expert replacement based on Joint Optimization, Multi-Shot Enhancement and Incremental Adjustment
        }
--- a/vllm_ascend/eplb/core/policy/policy_swift_balancer.py
+++ b/vllm_ascend/eplb/core/policy/policy_swift_balancer.py
@@ -62,7 +62,7 @@ class DynamicTable:
    placement_table = None


-class DynamicEplbV2(EplbPolicy):
+class SwiftBalanceEplb(EplbPolicy):

    def __init__(self, config: DynamicConfig):
        super().__init__(config)