[Platform] Enable ARM-only CPU binding with NUMA-balanced A3 policy and update docs/tests (#6686)

### What this PR does / why we need it? - Keeps enable_cpu_binding default on, but skips binding on non‑ARM CPUs inside bind_cpus, with a clear log. - Uses a table-driven binding policy: A3 uses NUMA‑balanced binding; other device types use NUMA‑affinity binding. - Updates docs to reflect the exact behavior and adds/updates unit tests for the new logic. ### Does this PR introduce _any_ user-facing change? - Yes. CPU binding is now enabled by default via additional_config, and documented in the user guide. - CPU binding behavior differs by device type (A3 vs. others). ### How was this patch tested? Added/updated unit tests: test_cpu_binding.py 1. test_binding_mode_table covers A2 vs A3 binding mode mapping. 2. test_build_cpu_pools_fallback_to_numa_balanced covers fallback when affinity info is missing. 3. TestBindingSwitch.test_is_arm_cpu covers ARM/x86/unknown arch detection. 4. test_bind_cpus_skip_non_arm covers non‑ARM skip path in bind_cpus. test_worker_v1.py 1. Updated mocks for enable_cpu_binding default True to align with new config default. - vLLM version: v0.14.1 - vLLM main: d7de043 --------- Signed-off-by: chenchuw886 <chenchuw@huawei.com> Co-authored-by: chenchuw886 <chenchuw@huawei.com>
2026-02-25 11:15:14 +08:00
parent ac9a7d1301
commit 3da2ba22eb
6 changed files with 80 additions and 9 deletions
--- a/tests/ut/device_allocator/test_cpu_binding.py
+++ b/tests/ut/device_allocator/test_cpu_binding.py
@@ -1,7 +1,8 @@
 import unittest
 from unittest.mock import patch

-from vllm_ascend.cpu_binding import CpuAlloc, DeviceInfo
+from vllm_ascend.cpu_binding import CpuAlloc, DeviceInfo, bind_cpus, is_arm_cpu
+from vllm_ascend.utils import AscendDeviceType


 class TestDeviceInfo(unittest.TestCase):
@@ -103,6 +104,23 @@ class TestCpuAlloc(unittest.TestCase):
            2: [8, 9, 10, 11, 12, 13]
        })

+    @patch('vllm_ascend.cpu_binding.get_ascend_device_type')
+    def test_binding_mode_table(self, mock_get_device_type):
+        mock_get_device_type.return_value = AscendDeviceType.A2
+        self.assertEqual(self.cpu_alloc._binding_mode(), "affinity")
+        mock_get_device_type.return_value = AscendDeviceType.A3
+        self.assertEqual(self.cpu_alloc._binding_mode(), "numa_balanced")
+
+    @patch('vllm_ascend.cpu_binding.get_ascend_device_type')
+    def test_build_cpu_pools_fallback_to_numa_balanced(self, mock_get_device_type):
+        mock_get_device_type.return_value = AscendDeviceType.A2
+        self.cpu_alloc.device_info.npu_affinity = {}
+        with patch.object(self.cpu_alloc, "build_cpu_node_map") as mock_build_cpu_node_map, \
+                patch.object(self.cpu_alloc, "handle_no_affinity") as mock_handle_no_affinity:
+            self.cpu_alloc.build_cpu_pools()
+        mock_build_cpu_node_map.assert_called_once()
+        mock_handle_no_affinity.assert_called_once()
+
    def test_extend_numa(self):
        result = self.cpu_alloc.extend_numa([])
        self.assertEqual(result, [])
@@ -128,8 +146,10 @@ class TestCpuAlloc(unittest.TestCase):
        self.assertEqual(self.cpu_alloc.numa_to_cpu_map,
                         expected_numa_to_cpu_map)

+    @patch('vllm_ascend.cpu_binding.get_ascend_device_type')
    @patch('vllm_ascend.cpu_binding.execute_command')
-    def test_handle_no_affinity(self, mock_execute_command):
+    def test_handle_no_affinity(self, mock_execute_command, mock_get_device_type):
+        mock_get_device_type.return_value = AscendDeviceType.A3
        mock_execute_command.side_effect = [("0 0\n1 1", 0), ("0 0\n1 1", 0)]
        self.cpu_alloc.device_info.running_npu_list = [0, 1]
        self.cpu_alloc.device_info.allowed_cpus = [0, 1, 2, 3]
@@ -163,5 +183,26 @@ class TestCpuAlloc(unittest.TestCase):
        mock_execute_command.assert_called()


+class TestBindingSwitch(unittest.TestCase):
+
+    @patch('vllm_ascend.cpu_binding.platform.machine')
+    def test_is_arm_cpu(self, mock_machine):
+        mock_machine.return_value = "x86_64"
+        self.assertFalse(is_arm_cpu())
+        mock_machine.return_value = "aarch64"
+        self.assertTrue(is_arm_cpu())
+        mock_machine.return_value = "armv8"
+        self.assertTrue(is_arm_cpu())
+        mock_machine.return_value = "mips64"
+        self.assertFalse(is_arm_cpu())
+
+    @patch('vllm_ascend.cpu_binding.CpuAlloc')
+    @patch('vllm_ascend.cpu_binding.is_arm_cpu')
+    def test_bind_cpus_skip_non_arm(self, mock_is_arm_cpu, mock_cpu_alloc):
+        mock_is_arm_cpu.return_value = False
+        bind_cpus(0)
+        mock_cpu_alloc.assert_not_called()
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/tests/ut/worker/test_worker_v1.py
+++ b/tests/ut/worker/test_worker_v1.py
@@ -70,7 +70,7 @@ class TestNPUWorker(TestBase):
        # Setup mock behavior
        mock_ops.register_dummy_fusion_op.return_value = None
        mock_ascend_config = MagicMock()
-        mock_ascend_config.enable_cpu_binding = False
+        mock_ascend_config.enable_cpu_binding = True
        mock_get_ascend_config.return_value = mock_ascend_config

        # Import and create NPUWorker instance
@@ -125,7 +125,7 @@ class TestNPUWorker(TestBase):
        self.model_config_mock.trust_remote_code = True
        mock_ops.register_dummy_fusion_op.return_value = None
        mock_ascend_config = MagicMock()
-        mock_ascend_config.enable_cpu_binding = False
+        mock_ascend_config.enable_cpu_binding = True
        mock_get_ascend_config.return_value = mock_ascend_config

        # Create NPUWorker instance
@@ -168,7 +168,7 @@ class TestNPUWorker(TestBase):
        self.cache_config_mock.cache_dtype = "float32"
        mock_ops.register_dummy_fusion_op.return_value = None
        mock_ascend_config = MagicMock()
-        mock_ascend_config.enable_cpu_binding = False
+        mock_ascend_config.enable_cpu_binding = True
        mock_get_ascend_config.return_value = mock_ascend_config

        # Create NPUWorker instance