[Feat][Graph] Support MTP for ACL Graph (#2932)
### What this PR does / why we need it?
This PR depends on the merge of #2707 and has adapted the aclgraph
functionality to support MTP.
### How was this patch tested?
- vLLM version: v0.10.2
- vLLM main:
2b85697031
---------
Signed-off-by: xuyexiong <xuyexiong@huawei.com>
This commit is contained in:
@@ -306,17 +306,12 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
self.spec_attn_mask = None
|
||||
self.drafter: Optional[Union[NgramProposer, EagleProposer,
|
||||
MtpProposer]] = None
|
||||
self.actual_seq_lengths_q = []
|
||||
self.actual_seq_lengths_q: list[int] = []
|
||||
self.decode_token_per_req = 1
|
||||
if self.speculative_config:
|
||||
spec_token_num = self.speculative_config.num_speculative_tokens
|
||||
assert spec_token_num > 0
|
||||
self.decode_token_per_req = 1 + spec_token_num
|
||||
self.actual_seq_lengths_q = [
|
||||
len for len in
|
||||
range(self.decode_token_per_req, self.max_num_tokens +
|
||||
1, self.decode_token_per_req)
|
||||
]
|
||||
self.spec_attn_mask = torch.triu(torch.ones(2048,
|
||||
2048,
|
||||
dtype=torch.bool),
|
||||
|
||||
Reference in New Issue
Block a user