[EPLB] The profiling can collect the time required for adjusting the eplb. (#7001)

### What this PR does / why we need it? To analyze the overhead of the dynamic eplb adjustment framework in detail, we added the time consumption of the adjustment to the print information in profiling mode. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? ![Snipaste_2026-03-05_11-42-28](https://github.com/user-attachments/assets/41c2b82a-5dfa-4e39-8b50-f4649deed30c) - vLLM version: v0.16.0 - vLLM main: 15d76f74e2 Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2026-03-05 16:10:57 +08:00
parent 43c8da3574
commit 5a3744c542
2 changed files with 9 additions and 13 deletions
--- a/vllm_ascend/eplb/eplb_updator.py
+++ b/vllm_ascend/eplb/eplb_updator.py
@@ -99,6 +99,9 @@ class EplbUpdator:
        self.eplb_process.planner_q.put(1)

    def forward_before(self):
+        # Batch after eplb process being triggered, get update info provided by eplb process
+        if self.get_update_info_flag():
+            self.update_info_all = self.eplb_process.block_update_q.get()
        if self.update_expert_weight_flag():
            (expert_send_info, expert_recv_info, updated_expert_map, log2phy_map, layer_id) = self.update_info_all.pop(
                0
@@ -117,11 +120,6 @@ class EplbUpdator:
            self.reqs = []
            self.eplb_loader.asyn_expert_weight_transfer(self.reqs)

-    def take_update_info_from_eplb_process(self):
-        # Batch after eplb process being triggered, get update info provided by eplb process
-        if self.get_update_info_flag():
-            self.update_info_all = self.eplb_process.block_update_q.get()
-
    def forward_end(self):
        if self.wakeup_eplb_worker_flag():
            self.compute_and_set_moe_load()