[Bugfix] Reset incompatible config (#6005)

### What this PR does / why we need it? This PR introduces compatibility fixes for running vLLM on Ascend NPU hardware. The changes ensure that GPU-specific parameters are automatically detected and reset to Ascend-compatible values with appropriate warnings logged. | Module | Parameter | Default Value | |--------|-----------|---------------| | Model Config | `disable_cascade_attn` | `False` | | Parallel Config | `all2all_backend` | `"allgather_reducescatter"` | | Cache Config | `cpu_kvcache_space_bytes` | `None` | | MultiModal Config | `mm_encoder_attn_backend` | `None` | | Observability Config | `enable_layerwise_nvtx_tracing` | `False` | | Scheduler Config | `max_num_partial_prefills` | `1` | | Speculative Config | `quantization` | `None` | | KV Transfer Config | `kv_buffer_size` | `1e9` | | KV Transfer Config | `enable_permute_local_kv` | `False` | | Attention Config | `use_prefill_decode_attention` | `False` | | Attention Config | `use_cudnn_prefill` | `False` | | Attention Config | `use_trtllm_ragged_deepseek_prefill` | `False` | | Attention Config | `use_trtllm_attention` | `False` | | Attention Config | `disable_flashinfer_prefill` | `False` | | Attention Config | `disable_flashinfer_q_quantization` | `False` | | Attention Config | `flash_attn_version` | `None` | | Attention Config | `backend` | `None` | | Attention Config | `flash_attn_max_num_splits_for_cuda_graph` | `32` | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: 2c24bc6996 Signed-off-by: hfadzxy <starmoon_zhang@163.com>
2026-01-20 11:02:38 +08:00
parent a8576ec610
commit a5b099c73d
4 changed files with 242 additions and 154 deletions
--- a/tests/ut/test_ascend_config.py
+++ b/tests/ut/test_ascend_config.py
@@ -13,18 +13,17 @@
 # This file is a part of the vllm-ascend project.
 #

+from unittest.mock import patch
+
 from vllm.config import VllmConfig

 from tests.ut.base import TestBase
-from vllm_ascend.ascend_config import (clear_ascend_config, get_ascend_config,
-                                       init_ascend_config)
+from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config, init_ascend_config


 class TestAscendConfig(TestBase):
-
    @staticmethod
    def _clean_up_ascend_config(func):
-
        def wrapper(*args, **kwargs):
            clear_ascend_config()
            func(*args, **kwargs)
@@ -33,7 +32,8 @@ class TestAscendConfig(TestBase):
        return wrapper

    @_clean_up_ascend_config
-    def test_init_ascend_config_without_additional_config(self):
+    @patch("vllm_ascend.platform.NPUPlatform._fix_incompatible_config")
+    def test_init_ascend_config_without_additional_config(self, mock_fix_incompatible_config):
        test_vllm_config = VllmConfig()
        # No additional config given, check the default value here.
        ascend_config = init_ascend_config(test_vllm_config)
@@ -47,7 +47,8 @@ class TestAscendConfig(TestBase):
        self.assertTrue(ascend_fusion_config.fusion_ops_gmmswigluquant)

    @_clean_up_ascend_config
-    def test_init_ascend_config_with_additional_config(self):
+    @patch("vllm_ascend.platform.NPUPlatform._fix_incompatible_config")
+    def test_init_ascend_config_with_additional_config(self, mock_fix_incompatible_config):
        test_vllm_config = VllmConfig()
        test_vllm_config.additional_config = {
            "ascend_compilation_config": {
@@ -57,11 +58,9 @@ class TestAscendConfig(TestBase):
                "fusion_ops_gmmswigluquant": False,
            },
            "multistream_overlap_shared_expert": True,
-            "eplb_config": {
-                "num_redundant_experts": 2
-            },
+            "eplb_config": {"num_redundant_experts": 2},
            "refresh": True,
-            "enable_kv_nz": False
+            "enable_kv_nz": False,
        }
        ascend_config = init_ascend_config(test_vllm_config)
        self.assertEqual(ascend_config.eplb_config.num_redundant_experts, 2)
@@ -76,7 +75,8 @@ class TestAscendConfig(TestBase):
        self.assertFalse(ascend_fusion_config.fusion_ops_gmmswigluquant)

    @_clean_up_ascend_config
-    def test_init_ascend_config_enable_npugraph_ex(self):
+    @patch("vllm_ascend.platform.NPUPlatform._fix_incompatible_config")
+    def test_init_ascend_config_enable_npugraph_ex(self, mock_fix_incompatible_config):
        test_vllm_config = VllmConfig()
        test_vllm_config.additional_config = {
            "enable_npugraph_ex": True,
@@ -86,7 +86,8 @@ class TestAscendConfig(TestBase):
        self.assertTrue(ascend_config.enable_npugraph_ex)

    @_clean_up_ascend_config
-    def test_get_ascend_config(self):
+    @patch("vllm_ascend.platform.NPUPlatform._fix_incompatible_config")
+    def test_get_ascend_config(self, mock_fix_incompatible_config):
        test_vllm_config = VllmConfig()
        ascend_config = init_ascend_config(test_vllm_config)
        self.assertEqual(get_ascend_config(), ascend_config)
@@ -97,7 +98,8 @@ class TestAscendConfig(TestBase):
            get_ascend_config()

    @_clean_up_ascend_config
-    def test_clear_ascend_config(self):
+    @patch("vllm_ascend.platform.NPUPlatform._fix_incompatible_config")
+    def test_clear_ascend_config(self, mock_fix_incompatible_config):
        test_vllm_config = VllmConfig()
        ascend_config = init_ascend_config(test_vllm_config)
        self.assertEqual(get_ascend_config(), ascend_config)