[Refactor] Cleanup platform (#5566)

### What this PR does / why we need it?
1. add `COMPILATION_PASS_KEY` constant
2. clean up useless platform interface `empty_cache`, `synchronize`,
`mem_get_info`, `clear_npu_memory`
3. rename `CUSTOM_OP_REGISTERED` to `_CUSTOM_OP_REGISTERED`
4. remove uesless env `VLLM_ENABLE_CUDAGRAPH_GC`

NPUPlatform is the interface called by vLLM. Do not call it inner
vllm-ascend.

### Does this PR introduce _any_ user-facing change?
This PR is just  a cleanup. All CI should pass.

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
7157596103

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2026-01-07 09:25:55 +08:00
committed by GitHub
parent 6ea2afe5fa
commit 1112208052
9 changed files with 79 additions and 217 deletions

View File

@@ -15,9 +15,8 @@
# This file is a part of the vllm-ascend project.
#
import gc
import os
from typing import TYPE_CHECKING, Optional, Tuple
from typing import TYPE_CHECKING, Optional
from uuid import uuid4
import torch
@@ -26,18 +25,16 @@ from vllm.platforms import Platform, PlatformEnum
# todo: please remove it when solve cuda hard code in vllm
os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "1"
# todo: please remove it when support controls garbage collection during CUDA graph capture.
os.environ["VLLM_ENABLE_CUDAGRAPH_GC"] = "1"
from vllm_ascend.ascend_config import init_ascend_config
from vllm_ascend.utils import refresh_block_size
# isort: off
from vllm_ascend.utils import (
ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType,
enable_sp, get_ascend_device_type, is_vl_model, update_aclgraph_sizes,
update_cudagraph_capture_sizes, update_default_aclgraph_sizes,
check_kv_extra_config)
ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD,
COMPILATION_PASS_KEY, AscendDeviceType, enable_sp, get_ascend_device_type,
is_vl_model, update_aclgraph_sizes, update_cudagraph_capture_sizes,
update_default_aclgraph_sizes, check_kv_extra_config)
if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig
@@ -47,7 +44,7 @@ else:
VllmConfig = None
FlexibleArgumentParser = None
CUSTOM_OP_REGISTERED = False
_CUSTOM_OP_REGISTERED = False
class NPUPlatform(Platform):
@@ -74,7 +71,7 @@ class NPUPlatform(Platform):
It is a parameter of inductor_config used to register custom passes.
Currently, we only use Inductor's 'pattern matcher' functionality, so we define our own pass_key.
"""
return "graph_fusion_manager"
return COMPILATION_PASS_KEY
@classmethod
def get_pass_manager_cls(cls) -> str:
@@ -131,24 +128,6 @@ class NPUPlatform(Platform):
def set_device(cls, device: torch.device):
torch.npu.set_device(device)
@classmethod
def empty_cache(cls):
torch.npu.empty_cache()
@classmethod
def synchronize(cls):
torch.npu.synchronize()
@classmethod
def mem_get_info(cls) -> Tuple[int, int]:
return torch.npu.mem_get_info()
@classmethod
def clear_npu_memory(cls):
gc.collect()
torch.npu.empty_cache()
torch.npu.reset_peak_memory_stats()
@classmethod
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
# initialize ascend config from vllm additional_config
@@ -351,8 +330,8 @@ class NPUPlatform(Platform):
# from vllm_ascend.utils import enable_custom_op
# enable_custom_op()
# set custom ops path
global CUSTOM_OP_REGISTERED
if CUSTOM_OP_REGISTERED:
global _CUSTOM_OP_REGISTERED
if _CUSTOM_OP_REGISTERED:
return
CUR_DIR = os.path.dirname(os.path.realpath(__file__))
CUSTOM_OPP_PATH = os.path.join(CUR_DIR, "_cann_ops_custom", "vendors",
@@ -365,7 +344,7 @@ class NPUPlatform(Platform):
"ASCEND_CUSTOM_OPP_PATH"] = f"{CUSTOM_OPP_PATH}:{current_cust_opp_path}"
else:
os.environ["ASCEND_CUSTOM_OPP_PATH"] = CUSTOM_OPP_PATH
CUSTOM_OP_REGISTERED = True
_CUSTOM_OP_REGISTERED = True
@classmethod
def get_attn_backend_cls(cls, selected_backend, attn_selector_config):