Remove COMPILE_CUSTOM_KERNELS env (#4864)
With more and more custom ops merged, disable `COMPILE_CUSTOM_KERNELS `
for vllm ascend seems useless now. Let's enable csrc compile by default.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -36,12 +36,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
|
||||
# Release, Debug, RelWithDebugInfo. If not set, the default value is Release.
|
||||
"CMAKE_BUILD_TYPE":
|
||||
lambda: os.getenv("CMAKE_BUILD_TYPE"),
|
||||
# Whether to compile custom kernels. If not set, the default value is True.
|
||||
# If set to False, the custom kernels will not be compiled. Please note that
|
||||
# the sleep mode feature will be disabled as well if custom kernels are not
|
||||
# compiled.
|
||||
"COMPILE_CUSTOM_KERNELS":
|
||||
lambda: bool(int(os.getenv("COMPILE_CUSTOM_KERNELS", "1"))),
|
||||
# The CXX compiler used for compiling the package. If not set, the default
|
||||
# value is None, which means the system default CXX compiler will be used.
|
||||
"CXX_COMPILER":
|
||||
|
||||
@@ -49,7 +49,6 @@ ACL_FORMAT_FRACTAL_ND = 2
|
||||
ACL_FORMAT_FRACTAL_NZ = 29
|
||||
|
||||
_CUSTOM_OP_ENABLED = None
|
||||
_SLEEP_MODE_ENABLED = None
|
||||
_CURRENT_STREAM = None
|
||||
_PREFETCH_STREAM = None
|
||||
_SHARED_EXPERTS_CALCULATION_STREAM = None
|
||||
@@ -125,14 +124,6 @@ def is_enable_nz():
|
||||
return envs_ascend.VLLM_ASCEND_ENABLE_NZ
|
||||
|
||||
|
||||
def sleep_mode_enabled():
|
||||
global _SLEEP_MODE_ENABLED
|
||||
if _SLEEP_MODE_ENABLED is None:
|
||||
from vllm_ascend import _build_info # type: ignore
|
||||
_SLEEP_MODE_ENABLED = _build_info.__sleep_mode_enabled__
|
||||
return _SLEEP_MODE_ENABLED
|
||||
|
||||
|
||||
def _round_up(x: int, align: int):
|
||||
# round up x to align, for example, if align is 16, x will be rounded up to 16, 32, 48, etc.
|
||||
# input: 15, 16 -> output: 16
|
||||
|
||||
@@ -54,7 +54,7 @@ from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
|
||||
from vllm_ascend.platform import NPUPlatform
|
||||
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
|
||||
is_enable_nz, register_ascend_customop,
|
||||
sleep_mode_enabled, try_register_lib)
|
||||
try_register_lib)
|
||||
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
|
||||
|
||||
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
|
||||
@@ -129,7 +129,7 @@ class NPUWorker(WorkerBase):
|
||||
init_cached_hf_modules()
|
||||
|
||||
self.profiler = self._init_profiler()
|
||||
if sleep_mode_enabled():
|
||||
if vllm_config.model_config and vllm_config.model_config.enable_sleep_mode:
|
||||
# Buffers saved before sleep
|
||||
self._sleep_saved_buffers: dict[str, torch.Tensor] = {}
|
||||
|
||||
@@ -140,10 +140,6 @@ class NPUWorker(WorkerBase):
|
||||
WEIGHT_LOADER_V2_SUPPORTED.remove("UnquantizedLinearMethod")
|
||||
|
||||
def sleep(self, level: int = 1) -> None:
|
||||
if not sleep_mode_enabled():
|
||||
raise ValueError(
|
||||
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
|
||||
)
|
||||
free_bytes_before_sleep = NPUPlatform.mem_get_info()[0]
|
||||
# Save the buffers before level 2 sleep
|
||||
if level == 2:
|
||||
@@ -164,11 +160,6 @@ class NPUWorker(WorkerBase):
|
||||
used_bytes / GiB_bytes)
|
||||
|
||||
def wake_up(self, tags: Optional[list[str]] = None) -> None:
|
||||
if not sleep_mode_enabled():
|
||||
raise ValueError(
|
||||
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
|
||||
)
|
||||
|
||||
if is_enable_nz():
|
||||
raise ValueError(
|
||||
"FRACTAL_NZ mode is enabled. This may cause model parameter precision issues "
|
||||
|
||||
Reference in New Issue
Block a user