Remove mindie_turbo (#4896)
mindie_turbo is out of data for long time. This PR remove the related register method.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -112,27 +112,6 @@ class TestUtils(TestBase):
|
||||
output_tensor = utils.aligned_16(input_tensor)
|
||||
self.assertEqual(output_tensor.shape[0], 32)
|
||||
|
||||
@mock.patch('importlib.util.find_spec')
|
||||
@mock.patch('importlib.import_module')
|
||||
def test_try_register_lib(self, mock_import_module, mock_find_spec):
|
||||
# import OK
|
||||
mock_find_spec.return_value = mock.MagicMock()
|
||||
mock_import_module.return_value = mock.MagicMock()
|
||||
lib_name = "existing_lib"
|
||||
lib_info = "Library found and imported successfully"
|
||||
utils.try_register_lib(lib_name, lib_info)
|
||||
|
||||
# Can't find lib
|
||||
mock_find_spec.return_value = None
|
||||
lib_name = "non_existing_lib"
|
||||
utils.try_register_lib(lib_name)
|
||||
|
||||
# import error
|
||||
mock_find_spec.return_value = mock.MagicMock()
|
||||
mock_import_module.side_effect = ImportError("import error")
|
||||
lib_name = "error_lib"
|
||||
utils.try_register_lib(lib_name)
|
||||
|
||||
def test_enable_custom_op(self):
|
||||
result = utils.enable_custom_op()
|
||||
self.assertTrue(result)
|
||||
|
||||
@@ -52,14 +52,12 @@ class TestNPUWorker(TestBase):
|
||||
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
||||
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
||||
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
||||
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
|
||||
@patch(init_cached_hf_modules_path)
|
||||
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
||||
def test_init_npu_worker_normal_case(
|
||||
self,
|
||||
mock_init_profiler,
|
||||
mock_init_cached_hf_modules,
|
||||
mock_try_register_lib,
|
||||
mock_check_ascend_device_type,
|
||||
mock_init_ascend_config,
|
||||
mock_get_ascend_config,
|
||||
@@ -94,12 +92,6 @@ class TestNPUWorker(TestBase):
|
||||
mock_init_ascend_config.assert_called_once_with(self.vllm_config_mock)
|
||||
mock_check_ascend_device_type.assert_called_once()
|
||||
|
||||
# Verify try_register_lib call
|
||||
mock_try_register_lib.assert_called_once_with(
|
||||
"mindie_turbo",
|
||||
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo.",
|
||||
)
|
||||
|
||||
# Verify cache_dtype setting
|
||||
self.assertEqual(worker.cache_dtype, torch.float16)
|
||||
mock_init_profiler.assert_called_once()
|
||||
@@ -114,14 +106,12 @@ class TestNPUWorker(TestBase):
|
||||
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
||||
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
||||
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
||||
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
|
||||
@patch(init_cached_hf_modules_path)
|
||||
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
||||
def test_init_npu_worker_with_trust_remote_code(
|
||||
self,
|
||||
mock_init_profiler,
|
||||
mock_init_cached_hf_modules,
|
||||
mock_try_register_lib,
|
||||
mock_check_ascend_device_type,
|
||||
mock_init_ascend_config,
|
||||
mock_get_ascend_config,
|
||||
@@ -159,14 +149,12 @@ class TestNPUWorker(TestBase):
|
||||
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
||||
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
||||
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
||||
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
|
||||
@patch(init_cached_hf_modules_path)
|
||||
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
||||
def test_init_npu_worker_with_custom_cache_dtype(
|
||||
self,
|
||||
mock_init_profiler,
|
||||
mock_init_cached_hf_modules,
|
||||
mock_try_register_lib,
|
||||
mock_check_ascend_device_type,
|
||||
mock_init_ascend_config,
|
||||
mock_get_ascend_config,
|
||||
|
||||
@@ -219,19 +219,6 @@ def aligned_16(tensor: torch.Tensor):
|
||||
return new_tensor
|
||||
|
||||
|
||||
def try_register_lib(lib_name: str, lib_info: str = ""):
|
||||
import importlib
|
||||
import importlib.util
|
||||
try:
|
||||
module_spec = importlib.util.find_spec(lib_name)
|
||||
if module_spec is not None:
|
||||
importlib.import_module(lib_name)
|
||||
if lib_info:
|
||||
logger.info(lib_info)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def enable_custom_op():
|
||||
"""
|
||||
Enable lazy init for vllm_ascend_C to avoid early initialization of CANN's RTS component.
|
||||
|
||||
@@ -53,8 +53,7 @@ from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
||||
from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
|
||||
from vllm_ascend.platform import NPUPlatform
|
||||
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
|
||||
is_enable_nz, register_ascend_customop,
|
||||
try_register_lib)
|
||||
is_enable_nz, register_ascend_customop)
|
||||
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
|
||||
|
||||
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
|
||||
@@ -111,11 +110,6 @@ class NPUWorker(WorkerBase):
|
||||
except Exception:
|
||||
logger.info("Skip binding cpu.")
|
||||
|
||||
# Try to import mindie_turbo to accelerate vLLM inference.
|
||||
try_register_lib(
|
||||
"mindie_turbo",
|
||||
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo."
|
||||
)
|
||||
if self.cache_config.cache_dtype == "auto":
|
||||
self.cache_dtype = self.model_config.dtype
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user