[Refactor] Cleanup platform (#5566)
### What this PR does / why we need it?
1. add `COMPILATION_PASS_KEY` constant
2. clean up useless platform interface `empty_cache`, `synchronize`,
`mem_get_info`, `clear_npu_memory`
3. rename `CUSTOM_OP_REGISTERED` to `_CUSTOM_OP_REGISTERED`
4. remove uesless env `VLLM_ENABLE_CUDAGRAPH_GC`
NPUPlatform is the interface called by vLLM. Do not call it inner
vllm-ascend.
### Does this PR introduce _any_ user-facing change?
This PR is just a cleanup. All CI should pass.
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
7157596103
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -15,9 +15,8 @@
|
||||
# This file is a part of the vllm-ascend project.
|
||||
#
|
||||
|
||||
import gc
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
import torch
|
||||
@@ -26,18 +25,16 @@ from vllm.platforms import Platform, PlatformEnum
|
||||
|
||||
# todo: please remove it when solve cuda hard code in vllm
|
||||
os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "1"
|
||||
# todo: please remove it when support controls garbage collection during CUDA graph capture.
|
||||
os.environ["VLLM_ENABLE_CUDAGRAPH_GC"] = "1"
|
||||
|
||||
from vllm_ascend.ascend_config import init_ascend_config
|
||||
from vllm_ascend.utils import refresh_block_size
|
||||
|
||||
# isort: off
|
||||
from vllm_ascend.utils import (
|
||||
ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType,
|
||||
enable_sp, get_ascend_device_type, is_vl_model, update_aclgraph_sizes,
|
||||
update_cudagraph_capture_sizes, update_default_aclgraph_sizes,
|
||||
check_kv_extra_config)
|
||||
ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD,
|
||||
COMPILATION_PASS_KEY, AscendDeviceType, enable_sp, get_ascend_device_type,
|
||||
is_vl_model, update_aclgraph_sizes, update_cudagraph_capture_sizes,
|
||||
update_default_aclgraph_sizes, check_kv_extra_config)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import ModelConfig, VllmConfig
|
||||
@@ -47,7 +44,7 @@ else:
|
||||
VllmConfig = None
|
||||
FlexibleArgumentParser = None
|
||||
|
||||
CUSTOM_OP_REGISTERED = False
|
||||
_CUSTOM_OP_REGISTERED = False
|
||||
|
||||
|
||||
class NPUPlatform(Platform):
|
||||
@@ -74,7 +71,7 @@ class NPUPlatform(Platform):
|
||||
It is a parameter of inductor_config used to register custom passes.
|
||||
Currently, we only use Inductor's 'pattern matcher' functionality, so we define our own pass_key.
|
||||
"""
|
||||
return "graph_fusion_manager"
|
||||
return COMPILATION_PASS_KEY
|
||||
|
||||
@classmethod
|
||||
def get_pass_manager_cls(cls) -> str:
|
||||
@@ -131,24 +128,6 @@ class NPUPlatform(Platform):
|
||||
def set_device(cls, device: torch.device):
|
||||
torch.npu.set_device(device)
|
||||
|
||||
@classmethod
|
||||
def empty_cache(cls):
|
||||
torch.npu.empty_cache()
|
||||
|
||||
@classmethod
|
||||
def synchronize(cls):
|
||||
torch.npu.synchronize()
|
||||
|
||||
@classmethod
|
||||
def mem_get_info(cls) -> Tuple[int, int]:
|
||||
return torch.npu.mem_get_info()
|
||||
|
||||
@classmethod
|
||||
def clear_npu_memory(cls):
|
||||
gc.collect()
|
||||
torch.npu.empty_cache()
|
||||
torch.npu.reset_peak_memory_stats()
|
||||
|
||||
@classmethod
|
||||
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
|
||||
# initialize ascend config from vllm additional_config
|
||||
@@ -351,8 +330,8 @@ class NPUPlatform(Platform):
|
||||
# from vllm_ascend.utils import enable_custom_op
|
||||
# enable_custom_op()
|
||||
# set custom ops path
|
||||
global CUSTOM_OP_REGISTERED
|
||||
if CUSTOM_OP_REGISTERED:
|
||||
global _CUSTOM_OP_REGISTERED
|
||||
if _CUSTOM_OP_REGISTERED:
|
||||
return
|
||||
CUR_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
CUSTOM_OPP_PATH = os.path.join(CUR_DIR, "_cann_ops_custom", "vendors",
|
||||
@@ -365,7 +344,7 @@ class NPUPlatform(Platform):
|
||||
"ASCEND_CUSTOM_OPP_PATH"] = f"{CUSTOM_OPP_PATH}:{current_cust_opp_path}"
|
||||
else:
|
||||
os.environ["ASCEND_CUSTOM_OPP_PATH"] = CUSTOM_OPP_PATH
|
||||
CUSTOM_OP_REGISTERED = True
|
||||
_CUSTOM_OP_REGISTERED = True
|
||||
|
||||
@classmethod
|
||||
def get_attn_backend_cls(cls, selected_backend, attn_selector_config):
|
||||
|
||||
Reference in New Issue
Block a user