[Feat]mtp aclgraph support (#3244)
### What this PR does / why we need it? Currently, MTP Model in deepseek can not be capture in ACLGraph. This PR is use to allow MTP to be captured in ACLGraph mode. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: anon189Ty <Stari_Falcon@outlook.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import torch
|
||||
from vllm.config import CUDAGraphMode
|
||||
from vllm.v1.spec_decode.ngram_proposer import \
|
||||
NgramProposer as VllmNgramProposer
|
||||
|
||||
@@ -23,7 +24,9 @@ class NgramProposer(VllmNgramProposer, Proposer):
|
||||
with_prefill=None,
|
||||
skip_attn=None,
|
||||
num_reqs=None,
|
||||
num_tokens_across_dp=None):
|
||||
num_tokens_across_dp=None,
|
||||
aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE,
|
||||
batch_descriptor=None):
|
||||
pass
|
||||
|
||||
def generate_token_ids(self,
|
||||
|
||||
Reference in New Issue
Block a user