From 05cc03d78557754e87e0dc7999d87da4e9f6e544 Mon Sep 17 00:00:00 2001 From: zhangxinyuehfad <59153331+zhangxinyuehfad@users.noreply.github.com> Date: Tue, 3 Feb 2026 10:32:02 +0800 Subject: [PATCH] [Bugfix] fix hash conflict due to reset incompatible configuations (#6368) ### What this PR does / why we need it? [Bugfix] fix hash conflict due to reset incompatible configuations ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.14.1 - vLLM main: https://github.com/vllm-project/vllm/commit/dc917cceb877dfd13f98c538c4c96158047d98bd Signed-off-by: hfadzxy --- vllm_ascend/platform.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index ecd435df..75329017 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -658,20 +658,8 @@ class NPUPlatform(Platform): ) model_config.disable_cascade_attn = False - # ==================== 2. Parallel Config ==================== - if vllm_config.parallel_config: - # Only allow the default all2all backend; others like deepep are not supported - default_backend = "allgather_reducescatter" - current_backend = getattr(vllm_config.parallel_config, "all2all_backend", default_backend) - if current_backend != default_backend: - logger.warning( - "Parameter '--all2all-backend' is set to '%s', which may be " - "incompatible with Ascend. Using internal plugin mechanisms.", - current_backend, - ) - vllm_config.parallel_config.all2all_backend = default_backend - - # ==================== 3. Cache Config ==================== + # ==================== 2. Cache Config ==================== + if vllm_config.cache_config: # Check and reset cpu_kvcache_space_bytes if getattr(vllm_config.cache_config, "cpu_kvcache_space_bytes", False): logger.warning( @@ -679,7 +667,7 @@ class NPUPlatform(Platform): ) vllm_config.cache_config.cpu_kvcache_space_bytes = None - # ==================== 4. MultiModal Config ==================== + # ==================== 3. MultiModal Config ==================== multimodal_config = getattr(model_config, "multimodal_config", None) if model_config else None if multimodal_config: # Ascend uses a different mechanism for Multi-Modal attention @@ -690,7 +678,7 @@ class NPUPlatform(Platform): ) multimodal_config.mm_encoder_attn_backend = None - # ==================== 5. Observability Config ==================== + # ==================== 4. Observability Config ==================== if vllm_config.observability_config: # NVTX tracing is NVIDIA specific if getattr(vllm_config.observability_config, "enable_layerwise_nvtx_tracing", False): @@ -700,7 +688,7 @@ class NPUPlatform(Platform): ) vllm_config.observability_config.enable_layerwise_nvtx_tracing = False - # ==================== 6. Scheduler Config ==================== + # ==================== 5. Scheduler Config ==================== if vllm_config.scheduler_config: # Partial prefills are specific to ROCm optimization if getattr(vllm_config.scheduler_config, "max_num_partial_prefills", 1) != 1: @@ -709,7 +697,7 @@ class NPUPlatform(Platform): ) vllm_config.scheduler_config.max_num_partial_prefills = 1 - # ==================== 7. Speculative Config ==================== + # ==================== 6. Speculative Config ==================== if vllm_config.speculative_config: # Ascend automatically inherits main model quantization if getattr(vllm_config.speculative_config, "quantization", None) is not None: @@ -719,7 +707,7 @@ class NPUPlatform(Platform): ) vllm_config.speculative_config.quantization = None - # ==================== 8. KV Transfer Config ==================== + # ==================== 7. KV Transfer Config ==================== if vllm_config.kv_transfer_config: # Buffer size is primarily tied to NCCL (GPU) backends current_buffer_size = getattr(vllm_config.kv_transfer_config, "kv_buffer_size", 1e9) @@ -739,7 +727,7 @@ class NPUPlatform(Platform): ) vllm_config.kv_transfer_config.enable_permute_local_kv = False - # ==================== 9. Attention Config ==================== + # ==================== 8. Attention Config ==================== if vllm_config.attention_config: att_config = vllm_config.attention_config