[Spec Decode]clean up spec decode interface (#6947)
This pull request refactors the speculative decoding proposer interface
to align with upstream vLLM, removing the local `Proposer` interface and
renaming methods to `propose`.
This is the first step. In the future we should remove the class
register and just add few Ascend specified method once the arch in vLLM
is ready.
- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -16,23 +16,23 @@
|
||||
# This file is a part of the vllm-ascend project.
|
||||
# Adapted from vllm-project/vllm/vllm/worker/gpu_model_runner.py
|
||||
#
|
||||
from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
|
||||
from vllm_ascend.spec_decode.medusa_proposer import MedusaProposer
|
||||
from vllm_ascend.spec_decode.mtp_proposer import MtpProposer
|
||||
from vllm_ascend.spec_decode.ngram_proposer import NgramProposer
|
||||
from vllm_ascend.spec_decode.suffix_proposer import SuffixDecodingProposer
|
||||
from vllm_ascend.spec_decode.eagle_proposer import AscendEagleProposer
|
||||
from vllm_ascend.spec_decode.medusa_proposer import AscendMedusaProposer
|
||||
from vllm_ascend.spec_decode.mtp_proposer import AscendMtpProposer
|
||||
from vllm_ascend.spec_decode.ngram_proposer import AscendNgramProposer
|
||||
from vllm_ascend.spec_decode.suffix_proposer import AscendSuffixDecodingProposer
|
||||
|
||||
|
||||
def get_spec_decode_method(method, vllm_config, device, runner):
|
||||
if method == "ngram":
|
||||
return NgramProposer(vllm_config, device, runner)
|
||||
elif method in ("eagle", "eagle3"):
|
||||
return EagleProposer(vllm_config, device, runner)
|
||||
elif method == "mtp":
|
||||
return MtpProposer(vllm_config, device, runner)
|
||||
return AscendNgramProposer(vllm_config, runner)
|
||||
elif method == "suffix":
|
||||
return SuffixDecodingProposer(vllm_config, device, runner)
|
||||
return AscendSuffixDecodingProposer(vllm_config, runner)
|
||||
elif method == "medusa":
|
||||
return MedusaProposer(vllm_config, device, runner)
|
||||
return AscendMedusaProposer(vllm_config, device)
|
||||
elif method in ("eagle", "eagle3"):
|
||||
return AscendEagleProposer(vllm_config, device, runner)
|
||||
elif method == "mtp":
|
||||
return AscendMtpProposer(vllm_config, device, runner)
|
||||
else:
|
||||
raise ValueError(f"Unknown speculative decoding method: {method}")
|
||||
|
||||
Reference in New Issue
Block a user