[Refactor] Cleanup platform (#5566)
### What this PR does / why we need it?
1. add `COMPILATION_PASS_KEY` constant
2. clean up useless platform interface `empty_cache`, `synchronize`,
`mem_get_info`, `clear_npu_memory`
3. rename `CUSTOM_OP_REGISTERED` to `_CUSTOM_OP_REGISTERED`
4. remove uesless env `VLLM_ENABLE_CUDAGRAPH_GC`
NPUPlatform is the interface called by vLLM. Do not call it inner
vllm-ascend.
### Does this PR introduce _any_ user-facing change?
This PR is just a cleanup. All CI should pass.
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
7157596103
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -120,115 +120,6 @@ class TestNPUPlatform(TestBase):
|
||||
self.assertIsNone(self.platform.inference_mode())
|
||||
mock_inference_mode.assert_called_once()
|
||||
|
||||
@patch("torch.npu.set_device")
|
||||
def test_set_device_normal(self, mock_set_device):
|
||||
device = torch.device("npu:0")
|
||||
self.platform.set_device(device)
|
||||
mock_set_device.assert_called_once_with(device)
|
||||
|
||||
@patch("torch.npu.set_device",
|
||||
side_effect=RuntimeError("Device not available"))
|
||||
def test_set_device_failure(self, mock_set_device):
|
||||
device = torch.device("npu:0")
|
||||
with self.assertRaises(RuntimeError):
|
||||
self.platform.set_device(device)
|
||||
mock_set_device.assert_called_once_with(device)
|
||||
|
||||
@patch("torch.npu.empty_cache")
|
||||
def test_empty_cache_normal(self, mock_empty_cache):
|
||||
self.platform.empty_cache()
|
||||
mock_empty_cache.assert_called_once()
|
||||
|
||||
@patch("torch.npu.empty_cache",
|
||||
side_effect=RuntimeError("Cache clearing failed"))
|
||||
def test_empty_cache_failure(self, mock_empty_cache):
|
||||
with self.assertRaises(RuntimeError):
|
||||
self.platform.empty_cache()
|
||||
mock_empty_cache.assert_called_once()
|
||||
|
||||
@patch("torch.npu.synchronize")
|
||||
def test_synchronize_normal(self, mock_synchronize):
|
||||
self.platform.synchronize()
|
||||
mock_synchronize.assert_called_once()
|
||||
|
||||
@patch("torch.npu.synchronize",
|
||||
side_effect=RuntimeError("Synchronization failed"))
|
||||
def test_synchronize_failure(self, mock_synchronize):
|
||||
with self.assertRaises(RuntimeError):
|
||||
self.platform.synchronize()
|
||||
mock_synchronize.assert_called_once()
|
||||
|
||||
@patch("torch.npu.mem_get_info")
|
||||
def test_mem_get_info_normal(self, mock_mem_get_info):
|
||||
free_memory_size = 1024
|
||||
total_memory_size = 2048
|
||||
memory_info = (free_memory_size, total_memory_size)
|
||||
mock_mem_get_info.return_value = memory_info
|
||||
result = self.platform.mem_get_info()
|
||||
self.assertIsInstance(result, tuple)
|
||||
self.assertEqual(len(result), 2)
|
||||
self.assertEqual(result, memory_info)
|
||||
mock_mem_get_info.assert_called_once()
|
||||
|
||||
@patch("torch.npu.mem_get_info",
|
||||
side_effect=RuntimeError("NPU not available"))
|
||||
def test_mem_get_info_failure(self, mock_mem_get_info):
|
||||
with self.assertRaises(RuntimeError):
|
||||
self.platform.mem_get_info()
|
||||
mock_mem_get_info.assert_called_once()
|
||||
|
||||
@patch("gc.collect")
|
||||
@patch("torch.npu.empty_cache")
|
||||
@patch("torch.npu.reset_peak_memory_stats")
|
||||
def test_clear_npu_memory_normal(self, mock_reset_stats, mock_empty_cache,
|
||||
mock_gc_collect):
|
||||
self.platform.clear_npu_memory()
|
||||
|
||||
mock_gc_collect.assert_called_once()
|
||||
mock_empty_cache.assert_called_once()
|
||||
mock_reset_stats.assert_called_once()
|
||||
|
||||
@patch("gc.collect", side_effect=Exception("GC failed"))
|
||||
@patch("torch.npu.empty_cache")
|
||||
@patch("torch.npu.reset_peak_memory_stats")
|
||||
def test_clear_npu_memory_gc_collect_failure(self, mock_reset_stats,
|
||||
mock_empty_cache,
|
||||
mock_gc_collect):
|
||||
with self.assertRaises(Exception):
|
||||
self.platform.clear_npu_memory()
|
||||
|
||||
mock_gc_collect.assert_called_once()
|
||||
mock_empty_cache.assert_not_called()
|
||||
mock_reset_stats.assert_not_called()
|
||||
|
||||
@patch("gc.collect")
|
||||
@patch("torch.npu.empty_cache",
|
||||
side_effect=RuntimeError("Cache clear failed"))
|
||||
@patch("torch.npu.reset_peak_memory_stats")
|
||||
def test_clear_npu_memory_empty_cache_failure(self, mock_reset_stats,
|
||||
mock_empty_cache,
|
||||
mock_gc_collect):
|
||||
with self.assertRaises(RuntimeError):
|
||||
self.platform.clear_npu_memory()
|
||||
|
||||
mock_gc_collect.assert_called_once()
|
||||
mock_empty_cache.assert_called_once()
|
||||
mock_reset_stats.assert_not_called()
|
||||
|
||||
@patch("gc.collect")
|
||||
@patch("torch.npu.empty_cache")
|
||||
@patch("torch.npu.reset_peak_memory_stats",
|
||||
side_effect=RuntimeError("Reset failed"))
|
||||
def test_clear_npu_memory_reset_stats_failure(self, mock_reset_stats,
|
||||
mock_empty_cache,
|
||||
mock_gc_collect):
|
||||
with self.assertRaises(RuntimeError):
|
||||
self.platform.clear_npu_memory()
|
||||
|
||||
mock_gc_collect.assert_called_once()
|
||||
mock_empty_cache.assert_called_once()
|
||||
mock_reset_stats.assert_called_once()
|
||||
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.utils.update_aclgraph_sizes")
|
||||
@patch('vllm_ascend.utils.get_ascend_device_type',
|
||||
|
||||
Reference in New Issue
Block a user