Remove mindie_turbo (#4896)

mindie_turbo is out of data for long time. This PR remove the related register method.

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-11 18:46:12 +08:00
committed by GitHub
parent b89763f1ed
commit 06a66939cd
4 changed files with 1 additions and 53 deletions

View File

@@ -112,27 +112,6 @@ class TestUtils(TestBase):
output_tensor = utils.aligned_16(input_tensor)
self.assertEqual(output_tensor.shape[0], 32)
@mock.patch('importlib.util.find_spec')
@mock.patch('importlib.import_module')
def test_try_register_lib(self, mock_import_module, mock_find_spec):
# import OK
mock_find_spec.return_value = mock.MagicMock()
mock_import_module.return_value = mock.MagicMock()
lib_name = "existing_lib"
lib_info = "Library found and imported successfully"
utils.try_register_lib(lib_name, lib_info)
# Can't find lib
mock_find_spec.return_value = None
lib_name = "non_existing_lib"
utils.try_register_lib(lib_name)
# import error
mock_find_spec.return_value = mock.MagicMock()
mock_import_module.side_effect = ImportError("import error")
lib_name = "error_lib"
utils.try_register_lib(lib_name)
def test_enable_custom_op(self):
result = utils.enable_custom_op()
self.assertTrue(result)

View File

@@ -52,14 +52,12 @@ class TestNPUWorker(TestBase):
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_normal_case(
self,
mock_init_profiler,
mock_init_cached_hf_modules,
mock_try_register_lib,
mock_check_ascend_device_type,
mock_init_ascend_config,
mock_get_ascend_config,
@@ -94,12 +92,6 @@ class TestNPUWorker(TestBase):
mock_init_ascend_config.assert_called_once_with(self.vllm_config_mock)
mock_check_ascend_device_type.assert_called_once()
# Verify try_register_lib call
mock_try_register_lib.assert_called_once_with(
"mindie_turbo",
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo.",
)
# Verify cache_dtype setting
self.assertEqual(worker.cache_dtype, torch.float16)
mock_init_profiler.assert_called_once()
@@ -114,14 +106,12 @@ class TestNPUWorker(TestBase):
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_with_trust_remote_code(
self,
mock_init_profiler,
mock_init_cached_hf_modules,
mock_try_register_lib,
mock_check_ascend_device_type,
mock_init_ascend_config,
mock_get_ascend_config,
@@ -159,14 +149,12 @@ class TestNPUWorker(TestBase):
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
@patch(init_cached_hf_modules_path)
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
def test_init_npu_worker_with_custom_cache_dtype(
self,
mock_init_profiler,
mock_init_cached_hf_modules,
mock_try_register_lib,
mock_check_ascend_device_type,
mock_init_ascend_config,
mock_get_ascend_config,

View File

@@ -219,19 +219,6 @@ def aligned_16(tensor: torch.Tensor):
return new_tensor
def try_register_lib(lib_name: str, lib_info: str = ""):
import importlib
import importlib.util
try:
module_spec = importlib.util.find_spec(lib_name)
if module_spec is not None:
importlib.import_module(lib_name)
if lib_info:
logger.info(lib_info)
except Exception:
pass
def enable_custom_op():
"""
Enable lazy init for vllm_ascend_C to avoid early initialization of CANN's RTS component.

View File

@@ -53,8 +53,7 @@ from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
is_enable_nz, register_ascend_customop,
try_register_lib)
is_enable_nz, register_ascend_customop)
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
@@ -111,11 +110,6 @@ class NPUWorker(WorkerBase):
except Exception:
logger.info("Skip binding cpu.")
# Try to import mindie_turbo to accelerate vLLM inference.
try_register_lib(
"mindie_turbo",
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo."
)
if self.cache_config.cache_dtype == "auto":
self.cache_dtype = self.model_config.dtype
else: