[Platform] Enable ARM-only CPU binding with NUMA-balanced A3 policy and update docs/tests (#6686)
### What this PR does / why we need it? - Keeps enable_cpu_binding default on, but skips binding on non‑ARM CPUs inside bind_cpus, with a clear log. - Uses a table-driven binding policy: A3 uses NUMA‑balanced binding; other device types use NUMA‑affinity binding. - Updates docs to reflect the exact behavior and adds/updates unit tests for the new logic. ### Does this PR introduce _any_ user-facing change? - Yes. CPU binding is now enabled by default via additional_config, and documented in the user guide. - CPU binding behavior differs by device type (A3 vs. others). ### How was this patch tested? Added/updated unit tests: test_cpu_binding.py 1. test_binding_mode_table covers A2 vs A3 binding mode mapping. 2. test_build_cpu_pools_fallback_to_numa_balanced covers fallback when affinity info is missing. 3. TestBindingSwitch.test_is_arm_cpu covers ARM/x86/unknown arch detection. 4. test_bind_cpus_skip_non_arm covers non‑ARM skip path in bind_cpus. test_worker_v1.py 1. Updated mocks for enable_cpu_binding default True to align with new config default. - vLLM version: v0.14.1 - vLLM main: d7de043 --------- Signed-off-by: chenchuw886 <chenchuw@huawei.com> Co-authored-by: chenchuw886 <chenchuw@huawei.com>
This commit is contained in:
@@ -1,16 +1,29 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from collections import defaultdict
|
||||
|
||||
import psutil
|
||||
from vllm.logger import logger
|
||||
|
||||
from vllm_ascend.utils import AscendDeviceType, get_ascend_device_type
|
||||
|
||||
ALLOWED_CPUS_PATH = "/proc/self/status"
|
||||
ASCEND_RT_VISIBLE_DEVICES = os.getenv("ASCEND_RT_VISIBLE_DEVICES")
|
||||
|
||||
|
||||
def is_arm_cpu() -> bool:
|
||||
arch = platform.machine().lower()
|
||||
if arch in {"x86_64", "amd64", "i386", "i686"}:
|
||||
return False
|
||||
if arch in {"aarch64", "arm64"} or arch.startswith("arm"):
|
||||
return True
|
||||
logger.warning(f"Unknown CPU architecture '{arch}', CPU binding will be disabled.")
|
||||
return False
|
||||
|
||||
|
||||
def execute_command(cmd: list[str]) -> tuple[str, int]:
|
||||
with subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
|
||||
out, _ = p.communicate(timeout=1000)
|
||||
@@ -77,7 +90,7 @@ class DeviceInfo:
|
||||
devices_list = [int(x) for x in devices_str.split(",")]
|
||||
running_npu_set = set(devices_list) & running_npu_set
|
||||
if not running_npu_set:
|
||||
raise RuntimeError("Can not get running npu info, you can use BIND_CPU=0 to skip.")
|
||||
raise RuntimeError("Can not get running npu info.")
|
||||
return sorted(running_npu_set)
|
||||
|
||||
def parse_allowed_cpus(self) -> list[int]:
|
||||
@@ -202,7 +215,7 @@ class CpuAlloc:
|
||||
npu_num_this_node = min(npu_num_per_node, num_running_npu - index)
|
||||
if npu_num_this_node <= 0:
|
||||
break
|
||||
# Evenly distribute the CPUs of this NUMA node among npu_num_this_node NPUs.
|
||||
# NUMA-balanced distribute the CPUs of this NUMA node among npu_num_this_node NPUs.
|
||||
total_cpu_num = len(cpus)
|
||||
base_cpu_num = total_cpu_num // npu_num_this_node
|
||||
extra_cpu_num = total_cpu_num % npu_num_this_node
|
||||
@@ -217,9 +230,22 @@ class CpuAlloc:
|
||||
index += 1
|
||||
start_index = end_index
|
||||
|
||||
DEVICE_BINDING_MODE = {
|
||||
AscendDeviceType.A3: "numa_balanced",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _binding_mode(cls) -> str:
|
||||
device_type = get_ascend_device_type()
|
||||
return cls.DEVICE_BINDING_MODE.get(device_type, "affinity")
|
||||
|
||||
def build_cpu_pools(self) -> None:
|
||||
self.build_cpu_node_map()
|
||||
if self._binding_mode() == "numa_balanced":
|
||||
self.handle_no_affinity()
|
||||
return
|
||||
if not self.device_info.npu_affinity:
|
||||
logger.warning("NPU affinity info not found, fallback to NUMA-balanced CPU binding.")
|
||||
self.handle_no_affinity()
|
||||
return
|
||||
for npu in self.device_info.running_npu_list:
|
||||
@@ -282,5 +308,8 @@ class CpuAlloc:
|
||||
|
||||
|
||||
def bind_cpus(rank_id: int) -> None:
|
||||
if not is_arm_cpu():
|
||||
logger.info("CPU binding skipped: non-ARM CPU detected.")
|
||||
return
|
||||
binder = CpuAlloc(rank_id)
|
||||
binder.run_all()
|
||||
|
||||
Reference in New Issue
Block a user