[Eagle] reduce one draft forward (#3468)

This commit is contained in:
Ying Sheng
2025-02-10 04:21:49 -08:00
committed by GitHub
parent 2d61132374
commit d23cb9a01e
2 changed files with 5 additions and 1 deletions

View File

@@ -947,7 +947,7 @@ class FlashInferMultiStepDraftBackend:
triton.next_power_of_2(bs),
)
for i in range(self.speculative_num_steps):
for i in range(self.speculative_num_steps - 1):
forward_batch.spec_info.kv_indptr = self.kv_indptr[i, : bs + 1]
forward_batch.spec_info.kv_indices = kv_indices_buffer[i][
: seq_lens_sum * self.topk + bs * (i + 1)