unify logic between aclgraph and torchair (#3602)
### What this PR does / why we need it? unify logic between aclgraph and torchair. This is a cherry-pick of https://github.com/vllm-project/vllm-ascend/pull/3560 Signed-off-by: zouyida2052 <zouyida2002@gmail.com>
This commit is contained in:
@@ -502,7 +502,7 @@ class MtpProposer(Proposer):
|
||||
|
||||
# prepare next mtp inputs
|
||||
# mtp>1: prefill skip or decode skip last loop
|
||||
if with_prefill and self.torchair_graph_enabled:
|
||||
if with_prefill:
|
||||
for _ in range(self.num_speculative_tokens - 1):
|
||||
draft_token_ids_list.append(draft_token_ids)
|
||||
if step == self.num_speculative_tokens - 1 or with_prefill:
|
||||
|
||||
Reference in New Issue
Block a user