[Refactor] Import global var form vllm instead of overwirte it (#5469)

### What this PR does / why we need it? Import global var form vllm instead of overwirte it, so that we could use the correct global variant value - vLLM version: v0.13.0 - vLLM main: 5326c89803 --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2026-01-07 18:41:45 +08:00
parent 380f089fbf
commit 3f4f2b4ae6
10 changed files with 7 additions and 157 deletions
--- a/vllm_ascend/spec_decode/mtp_proposer.py
+++ b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -9,6 +9,7 @@ from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
 from vllm.v1.attention.backends.utils import CommonAttentionMetadata
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.spec_decode.eagle import PADDING_SLOT_ID

 from vllm_ascend.ascend_forward_context import set_ascend_forward_context
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
@@ -18,8 +19,6 @@ from vllm_ascend.ops.rotary_embedding import get_cos_and_sin_mla
 from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
 from vllm_ascend.utils import ProfileExecuteDuration, lmhead_tp_enable

-PADDING_SLOT_ID = -1
-

 class MtpProposer(EagleProposer):