From 41dadd4312c309232dfdffa8d1c87fe9a93396f0 Mon Sep 17 00:00:00 2001
From: drslark <96540755+drslark@users.noreply.github.com>
Date: Mon, 23 Mar 2026 18:53:07 +0800
Subject: [PATCH] [main][bugfix] Solved the problem of the d node getting stuck
 in the pd-separation scenario (#7534)

### What this PR does / why we need it?
A problem of the d node getting stuck in the pd-separation scenario is
solved.

We find it will crash at `torch.nn.functional.linear(x, weight, bias)`
after being stuck for a long time.
we found that the shapes of each dp
node were not aligned. this is the root cause.

- vLLM version: v0.18.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d

Signed-off-by: drslark <slarksblood@qq.com>
---
 vllm_ascend/spec_decode/eagle_proposer.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/vllm_ascend/spec_decode/eagle_proposer.py b/vllm_ascend/spec_decode/eagle_proposer.py
index 35291ec5..403d52a2 100644
--- a/vllm_ascend/spec_decode/eagle_proposer.py
+++ b/vllm_ascend/spec_decode/eagle_proposer.py
@@ -439,7 +439,6 @@ class SpecDecodeBaseProposer(EagleProposer):
                 target_positions=model_positions,
                 inputs_embeds=None,
                 multi_steps_attn_metadata=multi_steps_attn_metadata,
-                is_dummy=True,
                 num_tokens=num_tokens,
             )
             forward_context = get_forward_context()
@@ -702,7 +701,6 @@ class SpecDecodeBaseProposer(EagleProposer):
         inputs_embeds,
         multi_steps_attn_metadata,
         num_tokens,
-        is_dummy=False,
         is_prefill=None,
     ) -> torch.Tensor:
         # The lifecycle of `input_ids`, `positions`, `hidden_states` runs through all
@@ -755,7 +753,7 @@ class SpecDecodeBaseProposer(EagleProposer):
                     self.runner.pcp_manager.pcp_allgather_restore_idx.gpu[: last_hidden_states.shape[0]],
                 )
 
-        if lmhead_tp_enable() and not is_dummy:
+        if lmhead_tp_enable():
             max_num_reqs_across_dp = (
                 self.vllm_config.scheduler_config.max_num_seqs * self.runner.uniform_decode_query_len
             )
@@ -766,7 +764,7 @@ class SpecDecodeBaseProposer(EagleProposer):
         sample_hidden_states = last_hidden_states[token_indices_to_sample]
         logits = self.model.compute_logits(sample_hidden_states)
 
-        if lmhead_tp_enable() and num_indices < logits.shape[0] and not is_dummy:
+        if lmhead_tp_enable() and num_indices < logits.shape[0]:
             logits = logits[:num_indices]
             token_indices_to_sample = token_indices_to_sample[:num_indices]
 
@@ -879,7 +877,7 @@ class SpecDecodeBaseProposer(EagleProposer):
             )
 
             num_indices = token_indices_to_sample.shape[0]
-            if lmhead_tp_enable() and not is_dummy:
+            if lmhead_tp_enable():
                 max_num_reqs_across_dp = (
                     self.vllm_config.scheduler_config.max_num_seqs * self.runner.uniform_decode_query_len
                 )
@@ -891,7 +889,7 @@ class SpecDecodeBaseProposer(EagleProposer):
             sample_hidden_states = last_hidden_states[token_indices_to_sample]
             logits = self.model.compute_logits(sample_hidden_states)
 
-            if lmhead_tp_enable() and num_indices < logits.shape[0] and not is_dummy:
+            if lmhead_tp_enable() and num_indices < logits.shape[0]:
                 logits = logits[:num_indices]
                 token_indices_to_sample = token_indices_to_sample[:num_indices]