[Misc] Remove VLLM_USE_V1 usage in code (#1764)
We plan to remove V0 code from this version. The first step is to delete
v0 usage.
Related: https://github.com/vllm-project/vllm-ascend/issues/1620
- vLLM version: v0.9.2
- vLLM main:
61e20828da
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -193,71 +193,48 @@ class TestAscendConfig(TestBase):
|
||||
@_clean_up_ascend_config
|
||||
def test_check_ascend_config_wrong_case(self):
|
||||
test_vllm_config = VllmConfig()
|
||||
# For V0 engine
|
||||
with mock.patch.dict(os.environ, {"VLLM_USE_V1": "0"}):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"ascend_scheduler_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, True)
|
||||
# For V1 engine
|
||||
with mock.patch.dict(os.environ, {"VLLM_USE_V1": "1"}):
|
||||
# torchair + eager mode
|
||||
with self.assertRaises(RuntimeError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
init_ascend_config(test_vllm_config)
|
||||
enforce_eager = True
|
||||
check_ascend_config(test_vllm_config, enforce_eager)
|
||||
# torchair + non deepseek model
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
model_path = os.path.join(os.path.dirname(__file__),
|
||||
"fake_weight")
|
||||
fake_model_config = ModelConfig(model=model_path)
|
||||
fake_model_config.hf_config = PretrainedConfig()
|
||||
fake_model_config.hf_config.model_type = "llama"
|
||||
test_vllm_config.model_config = fake_model_config
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
# aclgraph + deepseek model
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": False,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
model_path = os.path.join(os.path.dirname(__file__),
|
||||
"fake_weight")
|
||||
fake_model_config = ModelConfig(model=model_path)
|
||||
fake_model_config.hf_config = PretrainedConfig()
|
||||
fake_model_config.hf_config.model_type = "deepseek"
|
||||
test_vllm_config.model_config = fake_model_config
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
|
||||
# torchair + eager mode
|
||||
with self.assertRaises(RuntimeError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
init_ascend_config(test_vllm_config)
|
||||
enforce_eager = True
|
||||
check_ascend_config(test_vllm_config, enforce_eager)
|
||||
# torchair + non deepseek model
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
|
||||
fake_model_config = ModelConfig(model=model_path)
|
||||
fake_model_config.hf_config = PretrainedConfig()
|
||||
fake_model_config.hf_config.model_type = "llama"
|
||||
test_vllm_config.model_config = fake_model_config
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
# aclgraph + deepseek model
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": False,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
|
||||
fake_model_config = ModelConfig(model=model_path)
|
||||
fake_model_config.hf_config = PretrainedConfig()
|
||||
fake_model_config.hf_config.model_type = "deepseek"
|
||||
test_vllm_config.model_config = fake_model_config
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
|
||||
def test_check_torchair_supported(self):
|
||||
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
|
||||
|
||||
@@ -389,69 +389,6 @@ class TestNPUPlatform(TestBase):
|
||||
"vllm_ascend.worker.worker_v1.NPUWorker",
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm.envs.VLLM_USE_V1", False)
|
||||
def test_check_and_update_config_speculative_worker_config(
|
||||
self, mock_init_ascend, mock_check_ascend):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.speculative_config = MagicMock()
|
||||
self.mock_vllm_config.speculative_config.disable_logprobs = True
|
||||
self.mock_vllm_config.parallel_config.worker_cls = "auto"
|
||||
|
||||
with patch.dict("os.environ", {}):
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
import os
|
||||
|
||||
self.assertEqual(os.environ.get("ACL_OP_INIT_MODE"), "1")
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.parallel_config.worker_cls,
|
||||
"vllm.spec_decode.spec_decode_worker.create_spec_worker",
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.parallel_config.sd_worker_cls,
|
||||
"vllm_ascend.worker.worker.NPUWorker",
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm.envs.VLLM_USE_V1", False)
|
||||
def test_check_and_update_config_multi_step_worker_config(
|
||||
self, mock_init_ascend, mock_check_ascend):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.scheduler_config.is_multi_step = True
|
||||
self.mock_vllm_config.parallel_config.worker_cls = "auto"
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.parallel_config.worker_cls,
|
||||
"vllm_ascend.worker.multi_step_worker.MultiStepWorker",
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm.envs.VLLM_USE_V1", False)
|
||||
def test_check_and_update_config_default_worker_config(
|
||||
self, mock_init_ascend, mock_check_ascend):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.parallel_config.worker_cls = "auto"
|
||||
self.mock_vllm_config.scheduler_config.is_multi_step = False
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.parallel_config.worker_cls,
|
||||
"vllm_ascend.worker.worker.NPUWorker",
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=True)
|
||||
|
||||
Reference in New Issue
Block a user