[Spec Decode]clean up spec decode interface (#6947)
This pull request refactors the speculative decoding proposer interface
to align with upstream vLLM, removing the local `Proposer` interface and
renaming methods to `propose`.
This is the first step. In the future we should remove the class
register and just add few Ascend specified method once the arch in vLLM
is ready.
- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -15,11 +15,11 @@ from vllm_ascend.attention.attention_v1 import AscendAttentionState
|
||||
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
|
||||
from vllm_ascend.compilation.acl_graph import ACLGraphWrapper
|
||||
from vllm_ascend.ops.rotary_embedding import get_cos_and_sin_mla, update_cos_sin
|
||||
from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
|
||||
from vllm_ascend.spec_decode.eagle_proposer import AscendEagleProposer
|
||||
from vllm_ascend.utils import lmhead_tp_enable
|
||||
|
||||
|
||||
class MtpProposer(EagleProposer):
|
||||
class AscendMtpProposer(AscendEagleProposer):
|
||||
# TODO: Find out why ModelRunner does not this explicit typing?
|
||||
model: nn.Module | ACLGraphWrapper
|
||||
|
||||
|
||||
Reference in New Issue
Block a user