fix vl pd smoke error (#5103)

### What this PR does / why we need it? Fix VL model mooncacke PD smoke test error ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: 李少鹏 <lishaopeng21@huawei.com>
2025-12-18 22:20:45 +08:00
parent ff3914e31a
commit fd9a47c04d
1 changed files with 12 additions and 11 deletions
--- a/vllm_ascend/distributed/mooncake_connector.py
+++ b/vllm_ascend/distributed/mooncake_connector.py
@@ -42,7 +42,7 @@ from vllm.v1.request import RequestStatus
 from vllm_ascend.ascend_config import get_ascend_config, init_ascend_config
 from vllm_ascend.distributed.mooncake_transfer_engine import global_te
 from vllm_ascend.distributed.utils import get_transfer_timeout_value
-from vllm_ascend.utils import prefill_context_parallel_enable
+from vllm_ascend.utils import is_vl_model, prefill_context_parallel_enable

 if TYPE_CHECKING:
    from vllm.attention.backends.abstract import AttentionMetadata
@@ -317,6 +317,7 @@ class KVCacheRecvingThread(threading.Thread):
        self.vllm_config = vllm_config
        self.model_config = self.vllm_config.model_config
        self.block_size = self.vllm_config.cache_config.block_size
+        if not is_vl_model(vllm_config):
            if self.use_mla:
                self.k_head_dim = self.model_config.hf_config.kv_lora_rank
                self.v_head_dim = self.model_config.hf_config.qk_rope_head_dim