[Refactor] Modify the binding logic to allocate CPU cores for each NPU card (#5555)

[Refactor] Modify the binding logic to allocate CPU cores for each NPU card ### What this PR does / why we need it? Modify the binding logic to allocate CPU cores for each NPU card based on NUMA affinity, while isolating acl_thread/release_thread and other processes to prevent mutual interference. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? c85cc045f8 Signed-off-by: rowzwel_dx <1392851715@qq.com> - vLLM version: v0.13.0 - vLLM main: 7157596103 Signed-off-by: Rozwel-dx <1392851715@qq.com>
2026-01-13 09:21:28 +08:00
parent d886b81971
commit 8d571286dd
3 changed files with 470 additions and 316 deletions
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -115,17 +115,6 @@ class NPUWorker(WorkerBase):
                         distributed_init_method=distributed_init_method,
                         is_driver_worker=is_driver_worker)

-        # binding cpu
-        if get_ascend_config().enable_cpu_binding:
-            try:
-                bind_cpus(self.local_rank, ratio=1.0)
-            except RuntimeError as e:
-                logger.error(f"{e} in {self.local_rank}")
-            except ValueError as e:
-                logger.error(f"{e} in {self.local_rank}")
-            except Exception:
-                logger.info("Skip binding cpu.")
-
        if self.cache_config.cache_dtype == "auto":
            self.cache_dtype = self.model_config.dtype
        else:
@@ -238,6 +227,15 @@ class NPUWorker(WorkerBase):
        set_random_seed(self.model_config.seed)
        # Initialize device properties used by triton kernels.
        init_device_properties_triton()
+
+        # binding cpu
+        if get_ascend_config().enable_cpu_binding:
+            try:
+                bind_cpus(self.local_rank)
+            except Exception as e:
+                logger.warning(
+                    f"Bind cpus failed in rank{self.local_rank}: {e} Skip binding cpu."
+                )
        return device

    def init_device(self):