From 099255e933206491f1a87505e100cfef56d16668 Mon Sep 17 00:00:00 2001 From: linfeng-yuan <1102311262@qq.com> Date: Wed, 15 Oct 2025 17:13:27 +0800 Subject: [PATCH] [bugfix] fix pipeline parallel for mla & sfa attention backend (#3459) ### What this PR does / why we need it? Fix pipeline parallel break for mla & sfa attention backend caused by a magic number in metadata builder. The error report: `AttributeError: 'PPMissingLayer' object has no attribute 'self_attn'` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This PR was tested with "mp" backend (PP2TP8 on an A3 node) as well as "ray" backend (PP2TP8 on two A2 nodes). - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: linfeng-yuan <1102311262@qq.com> --- tests/e2e/multicard/test_pipeline_parallel.py | 1 + vllm_ascend/attention/mla_v1.py | 4 ++-- vllm_ascend/attention/sfa_v1.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/e2e/multicard/test_pipeline_parallel.py b/tests/e2e/multicard/test_pipeline_parallel.py index 03774db..fa21fe8 100644 --- a/tests/e2e/multicard/test_pipeline_parallel.py +++ b/tests/e2e/multicard/test_pipeline_parallel.py @@ -20,6 +20,7 @@ from tests.e2e.conftest import VllmRunner MODELS = [ "Qwen/Qwen3-0.6B", + "deepseek-ai/DeepSeek-V2-Lite-Chat", ] TENSOR_PARALLELS = [1] diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py index c8379b7..819edcb 100644 --- a/vllm_ascend/attention/mla_v1.py +++ b/vllm_ascend/attention/mla_v1.py @@ -314,9 +314,9 @@ class AscendMLAMetadataBuilder: if self.cos_cache is None: self.cos_cache = model.model.layers[ - 0].self_attn.rotary_emb.cos_cached + model.model.start_layer].self_attn.rotary_emb.cos_cached self.sin_cache = model.model.layers[ - 0].self_attn.rotary_emb.sin_cached + model.model.start_layer].self_attn.rotary_emb.sin_cached if self.cos_cache.dtype != self.model_config.dtype: # type: ignore self.cos_cache = self.cos_cache.to( # type: ignore self.model_config.dtype) # type: ignore diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py index 55282c8..edbd7cc 100644 --- a/vllm_ascend/attention/sfa_v1.py +++ b/vllm_ascend/attention/sfa_v1.py @@ -307,9 +307,9 @@ class AscendSFAMetadataBuilder: if self.cos_cache is None: self.cos_cache = model.model.layers[ - 0].self_attn.rotary_emb.cos_cached + model.model.start_layer].self_attn.rotary_emb.cos_cached self.sin_cache = model.model.layers[ - 0].self_attn.rotary_emb.sin_cached + model.model.start_layer].self_attn.rotary_emb.sin_cached if self.cos_cache.dtype != self.model_config.dtype: # type: ignore self.cos_cache = self.cos_cache.to( # type: ignore self.model_config.dtype) # type: ignore