From 05cc03d78557754e87e0dc7999d87da4e9f6e544 Mon Sep 17 00:00:00 2001
From: zhangxinyuehfad <59153331+zhangxinyuehfad@users.noreply.github.com>
Date: Tue, 3 Feb 2026 10:32:02 +0800
Subject: [PATCH] [Bugfix] fix hash conflict due to reset incompatible
 configuations (#6368)

### What this PR does / why we need it?
[Bugfix] fix hash conflict due to reset incompatible configuations

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.14.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/dc917cceb877dfd13f98c538c4c96158047d98bd

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
---
 vllm_ascend/platform.py | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index ecd435df..75329017 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -658,20 +658,8 @@ class NPUPlatform(Platform):
                 )
                 model_config.disable_cascade_attn = False
 
-        # ==================== 2. Parallel Config ====================
-        if vllm_config.parallel_config:
-            # Only allow the default all2all backend; others like deepep are not supported
-            default_backend = "allgather_reducescatter"
-            current_backend = getattr(vllm_config.parallel_config, "all2all_backend", default_backend)
-            if current_backend != default_backend:
-                logger.warning(
-                    "Parameter '--all2all-backend' is set to '%s', which may be "
-                    "incompatible with Ascend. Using internal plugin mechanisms.",
-                    current_backend,
-                )
-                vllm_config.parallel_config.all2all_backend = default_backend
-
-            # ==================== 3. Cache Config ====================
+        # ==================== 2. Cache Config ====================
+        if vllm_config.cache_config:
             # Check and reset cpu_kvcache_space_bytes
             if getattr(vllm_config.cache_config, "cpu_kvcache_space_bytes", False):
                 logger.warning(
@@ -679,7 +667,7 @@ class NPUPlatform(Platform):
                 )
                 vllm_config.cache_config.cpu_kvcache_space_bytes = None
 
-        # ==================== 4. MultiModal Config ====================
+        # ==================== 3. MultiModal Config ====================
         multimodal_config = getattr(model_config, "multimodal_config", None) if model_config else None
         if multimodal_config:
             # Ascend uses a different mechanism for Multi-Modal attention
@@ -690,7 +678,7 @@ class NPUPlatform(Platform):
                 )
                 multimodal_config.mm_encoder_attn_backend = None
 
-        # ==================== 5. Observability Config ====================
+        # ==================== 4. Observability Config ====================
         if vllm_config.observability_config:
             # NVTX tracing is NVIDIA specific
             if getattr(vllm_config.observability_config, "enable_layerwise_nvtx_tracing", False):
@@ -700,7 +688,7 @@ class NPUPlatform(Platform):
                 )
                 vllm_config.observability_config.enable_layerwise_nvtx_tracing = False
 
-        # ==================== 6. Scheduler Config ====================
+        # ==================== 5. Scheduler Config ====================
         if vllm_config.scheduler_config:
             # Partial prefills are specific to ROCm optimization
             if getattr(vllm_config.scheduler_config, "max_num_partial_prefills", 1) != 1:
@@ -709,7 +697,7 @@ class NPUPlatform(Platform):
                 )
                 vllm_config.scheduler_config.max_num_partial_prefills = 1
 
-        # ==================== 7. Speculative Config ====================
+        # ==================== 6. Speculative Config ====================
         if vllm_config.speculative_config:
             # Ascend automatically inherits main model quantization
             if getattr(vllm_config.speculative_config, "quantization", None) is not None:
@@ -719,7 +707,7 @@ class NPUPlatform(Platform):
                 )
                 vllm_config.speculative_config.quantization = None
 
-        # ==================== 8. KV Transfer Config ====================
+        # ==================== 7. KV Transfer Config ====================
         if vllm_config.kv_transfer_config:
             # Buffer size is primarily tied to NCCL (GPU) backends
             current_buffer_size = getattr(vllm_config.kv_transfer_config, "kv_buffer_size", 1e9)
@@ -739,7 +727,7 @@ class NPUPlatform(Platform):
                 )
                 vllm_config.kv_transfer_config.enable_permute_local_kv = False
 
-        # ==================== 9. Attention Config ====================
+        # ==================== 8. Attention Config ====================
         if vllm_config.attention_config:
             att_config = vllm_config.attention_config