[Refactor] Cleanup platform (#5566)

### What this PR does / why we need it? 1. add `COMPILATION_PASS_KEY` constant 2. clean up useless platform interface `empty_cache`, `synchronize`, `mem_get_info`, `clear_npu_memory` 3. rename `CUSTOM_OP_REGISTERED` to `_CUSTOM_OP_REGISTERED` 4. remove uesless env `VLLM_ENABLE_CUDAGRAPH_GC` NPUPlatform is the interface called by vLLM. Do not call it inner vllm-ascend. ### Does this PR introduce _any_ user-facing change? This PR is just a cleanup. All CI should pass. ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: 7157596103 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-01-07 09:25:55 +08:00
parent 6ea2afe5fa
commit 1112208052
9 changed files with 79 additions and 217 deletions
--- a/vllm_ascend/device_allocator/camem.py
+++ b/vllm_ascend/device_allocator/camem.py
@@ -25,8 +25,6 @@ import torch
 from acl.rt import memcpy  # type: ignore # noqa: F401
 from vllm.logger import logger

-from vllm_ascend.platform import NPUPlatform
-

 def find_loaded_library(lib_name) -> Optional[str]:
    """
@@ -196,11 +194,10 @@ class CaMemAllocator:
            handle = data.handle
            if data.tag in offload_tags:
                size_in_bytes = handle[1]
-                cpu_backup_tensor = torch.empty(
-                    size_in_bytes,
-                    dtype=torch.uint8,
-                    device='cpu',
-                    pin_memory=NPUPlatform.is_pin_memory_available())
+                cpu_backup_tensor = torch.empty(size_in_bytes,
+                                                dtype=torch.uint8,
+                                                device='cpu',
+                                                pin_memory=True)
                cpu_ptr = cpu_backup_tensor.data_ptr()
                ACL_MEMCPY_DEVICE_TO_HOST = 2
                dest_max = cpu_ptr + size_in_bytes * 2