From 099255e933206491f1a87505e100cfef56d16668 Mon Sep 17 00:00:00 2001
From: linfeng-yuan <1102311262@qq.com>
Date: Wed, 15 Oct 2025 17:13:27 +0800
Subject: [PATCH] [bugfix] fix pipeline parallel for mla & sfa attention
 backend (#3459)

### What this PR does / why we need it?
Fix pipeline parallel break for mla & sfa attention backend caused by a
magic number in metadata builder. The error report:
`AttributeError: 'PPMissingLayer' object has no attribute 'self_attn'`

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
This PR was tested with "mp" backend (PP2TP8 on an A3 node) as well as
"ray" backend (PP2TP8 on two A2 nodes).

- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

---------

Signed-off-by: linfeng-yuan <1102311262@qq.com>
---
 tests/e2e/multicard/test_pipeline_parallel.py | 1 +
 vllm_ascend/attention/mla_v1.py               | 4 ++--
 vllm_ascend/attention/sfa_v1.py               | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/multicard/test_pipeline_parallel.py b/tests/e2e/multicard/test_pipeline_parallel.py
index 03774db..fa21fe8 100644
--- a/tests/e2e/multicard/test_pipeline_parallel.py
+++ b/tests/e2e/multicard/test_pipeline_parallel.py
@@ -20,6 +20,7 @@ from tests.e2e.conftest import VllmRunner
 
 MODELS = [
     "Qwen/Qwen3-0.6B",
+    "deepseek-ai/DeepSeek-V2-Lite-Chat",
 ]
 
 TENSOR_PARALLELS = [1]
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
index c8379b7..819edcb 100644
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@@ -314,9 +314,9 @@ class AscendMLAMetadataBuilder:
 
         if self.cos_cache is None:
             self.cos_cache = model.model.layers[
-                0].self_attn.rotary_emb.cos_cached
+                model.model.start_layer].self_attn.rotary_emb.cos_cached
             self.sin_cache = model.model.layers[
-                0].self_attn.rotary_emb.sin_cached
+                model.model.start_layer].self_attn.rotary_emb.sin_cached
         if self.cos_cache.dtype != self.model_config.dtype:  # type: ignore
             self.cos_cache = self.cos_cache.to(  # type: ignore
                 self.model_config.dtype)  # type: ignore
diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py
index 55282c8..edbd7cc 100644
--- a/vllm_ascend/attention/sfa_v1.py
+++ b/vllm_ascend/attention/sfa_v1.py
@@ -307,9 +307,9 @@ class AscendSFAMetadataBuilder:
 
         if self.cos_cache is None:
             self.cos_cache = model.model.layers[
-                0].self_attn.rotary_emb.cos_cached
+                model.model.start_layer].self_attn.rotary_emb.cos_cached
             self.sin_cache = model.model.layers[
-                0].self_attn.rotary_emb.sin_cached
+                model.model.start_layer].self_attn.rotary_emb.sin_cached
         if self.cos_cache.dtype != self.model_config.dtype:  # type: ignore
             self.cos_cache = self.cos_cache.to(  # type: ignore
                 self.model_config.dtype)  # type: ignore