Drop torchair (#4814)
aclgraph is stable and fast now. Let's drop torchair graph mode now.
TODO: some logic to adapt torchair should be cleaned up as well. We'll
do it in the following PR.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Co-authored-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
@@ -20,21 +20,14 @@ from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
|
||||
from vllm_ascend.spec_decode.mtp_proposer import MtpProposer
|
||||
from vllm_ascend.spec_decode.ngram_proposer import NgramProposer
|
||||
from vllm_ascend.spec_decode.suffix_proposer import SuffixDecodingProposer
|
||||
from vllm_ascend.torchair.torchair_mtp_proposer import TorchairMtpProposer
|
||||
|
||||
|
||||
def get_spec_decode_method(method,
|
||||
vllm_config,
|
||||
device,
|
||||
runner,
|
||||
is_torchair_graph=False):
|
||||
def get_spec_decode_method(method, vllm_config, device, runner):
|
||||
if method == "ngram":
|
||||
return NgramProposer(vllm_config, device, runner)
|
||||
elif method in ("eagle", "eagle3"):
|
||||
return EagleProposer(vllm_config, device, runner)
|
||||
elif method == "mtp":
|
||||
if is_torchair_graph:
|
||||
return TorchairMtpProposer(vllm_config, device, runner)
|
||||
return MtpProposer(vllm_config, device, runner)
|
||||
elif method == 'suffix':
|
||||
return SuffixDecodingProposer(vllm_config, device, runner)
|
||||
|
||||
Reference in New Issue
Block a user