Remove mindie_turbo (#4896)
mindie_turbo is out of data for long time. This PR remove the related register method.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -112,27 +112,6 @@ class TestUtils(TestBase):
|
|||||||
output_tensor = utils.aligned_16(input_tensor)
|
output_tensor = utils.aligned_16(input_tensor)
|
||||||
self.assertEqual(output_tensor.shape[0], 32)
|
self.assertEqual(output_tensor.shape[0], 32)
|
||||||
|
|
||||||
@mock.patch('importlib.util.find_spec')
|
|
||||||
@mock.patch('importlib.import_module')
|
|
||||||
def test_try_register_lib(self, mock_import_module, mock_find_spec):
|
|
||||||
# import OK
|
|
||||||
mock_find_spec.return_value = mock.MagicMock()
|
|
||||||
mock_import_module.return_value = mock.MagicMock()
|
|
||||||
lib_name = "existing_lib"
|
|
||||||
lib_info = "Library found and imported successfully"
|
|
||||||
utils.try_register_lib(lib_name, lib_info)
|
|
||||||
|
|
||||||
# Can't find lib
|
|
||||||
mock_find_spec.return_value = None
|
|
||||||
lib_name = "non_existing_lib"
|
|
||||||
utils.try_register_lib(lib_name)
|
|
||||||
|
|
||||||
# import error
|
|
||||||
mock_find_spec.return_value = mock.MagicMock()
|
|
||||||
mock_import_module.side_effect = ImportError("import error")
|
|
||||||
lib_name = "error_lib"
|
|
||||||
utils.try_register_lib(lib_name)
|
|
||||||
|
|
||||||
def test_enable_custom_op(self):
|
def test_enable_custom_op(self):
|
||||||
result = utils.enable_custom_op()
|
result = utils.enable_custom_op()
|
||||||
self.assertTrue(result)
|
self.assertTrue(result)
|
||||||
|
|||||||
@@ -52,14 +52,12 @@ class TestNPUWorker(TestBase):
|
|||||||
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
||||||
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
||||||
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
||||||
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
|
|
||||||
@patch(init_cached_hf_modules_path)
|
@patch(init_cached_hf_modules_path)
|
||||||
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
||||||
def test_init_npu_worker_normal_case(
|
def test_init_npu_worker_normal_case(
|
||||||
self,
|
self,
|
||||||
mock_init_profiler,
|
mock_init_profiler,
|
||||||
mock_init_cached_hf_modules,
|
mock_init_cached_hf_modules,
|
||||||
mock_try_register_lib,
|
|
||||||
mock_check_ascend_device_type,
|
mock_check_ascend_device_type,
|
||||||
mock_init_ascend_config,
|
mock_init_ascend_config,
|
||||||
mock_get_ascend_config,
|
mock_get_ascend_config,
|
||||||
@@ -94,12 +92,6 @@ class TestNPUWorker(TestBase):
|
|||||||
mock_init_ascend_config.assert_called_once_with(self.vllm_config_mock)
|
mock_init_ascend_config.assert_called_once_with(self.vllm_config_mock)
|
||||||
mock_check_ascend_device_type.assert_called_once()
|
mock_check_ascend_device_type.assert_called_once()
|
||||||
|
|
||||||
# Verify try_register_lib call
|
|
||||||
mock_try_register_lib.assert_called_once_with(
|
|
||||||
"mindie_turbo",
|
|
||||||
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Verify cache_dtype setting
|
# Verify cache_dtype setting
|
||||||
self.assertEqual(worker.cache_dtype, torch.float16)
|
self.assertEqual(worker.cache_dtype, torch.float16)
|
||||||
mock_init_profiler.assert_called_once()
|
mock_init_profiler.assert_called_once()
|
||||||
@@ -114,14 +106,12 @@ class TestNPUWorker(TestBase):
|
|||||||
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
||||||
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
||||||
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
||||||
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
|
|
||||||
@patch(init_cached_hf_modules_path)
|
@patch(init_cached_hf_modules_path)
|
||||||
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
||||||
def test_init_npu_worker_with_trust_remote_code(
|
def test_init_npu_worker_with_trust_remote_code(
|
||||||
self,
|
self,
|
||||||
mock_init_profiler,
|
mock_init_profiler,
|
||||||
mock_init_cached_hf_modules,
|
mock_init_cached_hf_modules,
|
||||||
mock_try_register_lib,
|
|
||||||
mock_check_ascend_device_type,
|
mock_check_ascend_device_type,
|
||||||
mock_init_ascend_config,
|
mock_init_ascend_config,
|
||||||
mock_get_ascend_config,
|
mock_get_ascend_config,
|
||||||
@@ -159,14 +149,12 @@ class TestNPUWorker(TestBase):
|
|||||||
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
@patch("vllm_ascend.worker.worker_v1.get_ascend_config")
|
||||||
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
@patch("vllm_ascend.worker.worker_v1.init_ascend_config")
|
||||||
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
@patch("vllm_ascend.worker.worker_v1.check_ascend_device_type")
|
||||||
@patch("vllm_ascend.worker.worker_v1.try_register_lib")
|
|
||||||
@patch(init_cached_hf_modules_path)
|
@patch(init_cached_hf_modules_path)
|
||||||
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
@patch("vllm_ascend.worker.worker_v1.NPUWorker._init_profiler")
|
||||||
def test_init_npu_worker_with_custom_cache_dtype(
|
def test_init_npu_worker_with_custom_cache_dtype(
|
||||||
self,
|
self,
|
||||||
mock_init_profiler,
|
mock_init_profiler,
|
||||||
mock_init_cached_hf_modules,
|
mock_init_cached_hf_modules,
|
||||||
mock_try_register_lib,
|
|
||||||
mock_check_ascend_device_type,
|
mock_check_ascend_device_type,
|
||||||
mock_init_ascend_config,
|
mock_init_ascend_config,
|
||||||
mock_get_ascend_config,
|
mock_get_ascend_config,
|
||||||
|
|||||||
@@ -219,19 +219,6 @@ def aligned_16(tensor: torch.Tensor):
|
|||||||
return new_tensor
|
return new_tensor
|
||||||
|
|
||||||
|
|
||||||
def try_register_lib(lib_name: str, lib_info: str = ""):
|
|
||||||
import importlib
|
|
||||||
import importlib.util
|
|
||||||
try:
|
|
||||||
module_spec = importlib.util.find_spec(lib_name)
|
|
||||||
if module_spec is not None:
|
|
||||||
importlib.import_module(lib_name)
|
|
||||||
if lib_info:
|
|
||||||
logger.info(lib_info)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def enable_custom_op():
|
def enable_custom_op():
|
||||||
"""
|
"""
|
||||||
Enable lazy init for vllm_ascend_C to avoid early initialization of CANN's RTS component.
|
Enable lazy init for vllm_ascend_C to avoid early initialization of CANN's RTS component.
|
||||||
|
|||||||
@@ -53,8 +53,7 @@ from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
|||||||
from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
|
from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
|
||||||
from vllm_ascend.platform import NPUPlatform
|
from vllm_ascend.platform import NPUPlatform
|
||||||
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
|
from vllm_ascend.utils import (check_ascend_device_type, enable_sp,
|
||||||
is_enable_nz, register_ascend_customop,
|
is_enable_nz, register_ascend_customop)
|
||||||
try_register_lib)
|
|
||||||
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
|
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
|
||||||
|
|
||||||
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
|
torch._dynamo.trace_rules.clear_lru_cache() # noqa: E402
|
||||||
@@ -111,11 +110,6 @@ class NPUWorker(WorkerBase):
|
|||||||
except Exception:
|
except Exception:
|
||||||
logger.info("Skip binding cpu.")
|
logger.info("Skip binding cpu.")
|
||||||
|
|
||||||
# Try to import mindie_turbo to accelerate vLLM inference.
|
|
||||||
try_register_lib(
|
|
||||||
"mindie_turbo",
|
|
||||||
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo."
|
|
||||||
)
|
|
||||||
if self.cache_config.cache_dtype == "auto":
|
if self.cache_config.cache_dtype == "auto":
|
||||||
self.cache_dtype = self.model_config.dtype
|
self.cache_dtype = self.model_config.dtype
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user