[BugFix]Backport validate pd mode feature gates no fused mc2 v0.18.0 clean (#8583)
### What this PR does / why we need it? Backport validate pd mode feature gates no fused mc2 v0.18.0 clean backport #8582 --------- Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
This commit is contained in:
@@ -541,134 +541,6 @@ class TestNPUPlatform(TestBase):
|
||||
):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
@patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
|
||||
@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
|
||||
def test_check_and_update_config_fused_mc2_rejects_pd_mixed_no_kv_transfer(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
|
||||
):
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.recompute_scheduler_enable = False
|
||||
mock_ascend_config.enable_mc2_hierarchy_comm = False
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.kv_transfer_config = None
|
||||
vllm_config.parallel_config.decode_context_parallel_size = 1
|
||||
vllm_config.parallel_config.prefill_context_parallel_size = 1
|
||||
vllm_config.parallel_config.tensor_parallel_size = 1
|
||||
vllm_config.scheduler_config = MagicMock()
|
||||
mock_init_recompute.return_value = MagicMock()
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform = platform.NPUPlatform()
|
||||
|
||||
with patch("vllm_ascend.platform.envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2", 1, create=True):
|
||||
with pytest.raises(ValueError, match=r"VLLM_ASCEND_ENABLE_FUSED_MC2.*kv_role='kv_consumer'.*PD-mixed"):
|
||||
with patch.object(platform.NPUPlatform, "_fix_incompatible_config"):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
@patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
|
||||
@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
|
||||
def test_check_and_update_config_fused_mc2_rejects_pd_mixed_kv_both(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
|
||||
):
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.recompute_scheduler_enable = False
|
||||
mock_ascend_config.enable_mc2_hierarchy_comm = False
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.kv_transfer_config = MagicMock(kv_role="kv_both", engine_id="engine0")
|
||||
vllm_config.parallel_config.decode_context_parallel_size = 1
|
||||
vllm_config.parallel_config.prefill_context_parallel_size = 1
|
||||
vllm_config.parallel_config.tensor_parallel_size = 1
|
||||
vllm_config.scheduler_config = MagicMock()
|
||||
mock_init_recompute.return_value = MagicMock()
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform = platform.NPUPlatform()
|
||||
|
||||
with patch("vllm_ascend.platform.envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2", 1, create=True):
|
||||
with pytest.raises(ValueError, match=r"VLLM_ASCEND_ENABLE_FUSED_MC2.*kv_role='kv_consumer'.*kv_role='kv_both'"):
|
||||
with (
|
||||
patch.object(platform.NPUPlatform, "_fix_incompatible_config"),
|
||||
patch.object(platform, "check_kv_extra_config"),
|
||||
):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
@patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
|
||||
@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
|
||||
def test_check_and_update_config_fused_mc2_rejects_pd_disaggregated_kv_producer(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
|
||||
):
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.recompute_scheduler_enable = False
|
||||
mock_ascend_config.enable_mc2_hierarchy_comm = False
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.kv_transfer_config = MagicMock(kv_role="kv_producer", engine_id="engine0")
|
||||
vllm_config.parallel_config.decode_context_parallel_size = 1
|
||||
vllm_config.parallel_config.prefill_context_parallel_size = 1
|
||||
vllm_config.parallel_config.tensor_parallel_size = 1
|
||||
vllm_config.scheduler_config = MagicMock()
|
||||
mock_init_recompute.return_value = MagicMock()
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform = platform.NPUPlatform()
|
||||
|
||||
with patch("vllm_ascend.platform.envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2", 1, create=True):
|
||||
with pytest.raises(ValueError, match=r"VLLM_ASCEND_ENABLE_FUSED_MC2.*kv_role='kv_consumer'.*kv_role='kv_producer'"):
|
||||
with (
|
||||
patch.object(platform.NPUPlatform, "_fix_incompatible_config"),
|
||||
patch.object(platform, "check_kv_extra_config"),
|
||||
):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
@patch("vllm_ascend.quantization.utils.maybe_auto_detect_quantization")
|
||||
@patch("vllm_ascend.utils.get_ascend_device_type", return_value=AscendDeviceType.A3)
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@patch("vllm_ascend.core.recompute_scheduler.RecomputeSchedulerConfig.initialize_from_config")
|
||||
def test_check_and_update_config_fused_mc2_allows_pd_disaggregated_kv_consumer(
|
||||
self, mock_init_recompute, mock_init_ascend, mock_soc_version, mock_auto_detect
|
||||
):
|
||||
mock_ascend_config = TestNPUPlatform.mock_vllm_ascend_config()
|
||||
mock_ascend_config.recompute_scheduler_enable = False
|
||||
mock_ascend_config.enable_mc2_hierarchy_comm = False
|
||||
mock_init_ascend.return_value = mock_ascend_config
|
||||
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.kv_transfer_config = MagicMock(kv_role="kv_consumer", engine_id="engine0")
|
||||
vllm_config.parallel_config.decode_context_parallel_size = 1
|
||||
vllm_config.parallel_config.prefill_context_parallel_size = 1
|
||||
vllm_config.parallel_config.tensor_parallel_size = 1
|
||||
vllm_config.scheduler_config = MagicMock()
|
||||
mock_init_recompute.return_value = MagicMock()
|
||||
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform = platform.NPUPlatform()
|
||||
|
||||
with patch("vllm_ascend.platform.envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2", 1, create=True):
|
||||
with (
|
||||
patch.object(platform.NPUPlatform, "_fix_incompatible_config"),
|
||||
patch.object(platform, "check_kv_extra_config"),
|
||||
):
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
|
||||
def test_update_block_size_for_backend_preserves_hybrid_block_size(self):
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.is_hybrid = True
|
||||
|
||||
Reference in New Issue
Block a user