[Spec Decode]clean up spec decode interface (#6947)

This pull request refactors the speculative decoding proposer interface to align with upstream vLLM, removing the local `Proposer` interface and renaming methods to `propose`. This is the first step. In the future we should remove the class register and just add few Ascend specified method once the arch in vLLM is ready. - vLLM version: v0.16.0 - vLLM main: 15d76f74e2 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-03-05 14:30:10 +08:00
parent 2bd9c35788
commit 13777bf3f0
11 changed files with 194 additions and 315 deletions
--- a/vllm_ascend/spec_decode/mtp_proposer.py
+++ b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -15,11 +15,11 @@ from vllm_ascend.attention.attention_v1 import AscendAttentionState
 from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 from vllm_ascend.compilation.acl_graph import ACLGraphWrapper
 from vllm_ascend.ops.rotary_embedding import get_cos_and_sin_mla, update_cos_sin
-from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
+from vllm_ascend.spec_decode.eagle_proposer import AscendEagleProposer
 from vllm_ascend.utils import lmhead_tp_enable


-class MtpProposer(EagleProposer):
+class AscendMtpProposer(AscendEagleProposer):
    # TODO: Find out why ModelRunner does not this explicit typing?
    model: nn.Module | ACLGraphWrapper