Revert "[EAGLE] Refactor code for page size > 1 & more simplifications" (#7210)

This commit is contained in:
Lianmin Zheng
2025-06-15 02:48:00 -07:00
committed by GitHub
parent 5f1ab32717
commit fff10809bf
7 changed files with 150 additions and 647 deletions

View File

@@ -1049,13 +1049,14 @@ class FlashInferMultiStepDraftBackend:
kv_indices_buffer,
self.kv_indptr,
forward_batch.positions,
num_seqs,
self.topk,
self.pool_len,
kv_indices_buffer.shape[1],
self.kv_indptr.shape[1],
next_power_of_2(num_seqs),
next_power_of_2(self.speculative_num_steps),
next_power_of_2(bs),
self.page_size,
)
assert forward_batch.spec_info is not None

View File

@@ -789,7 +789,6 @@ class FlashInferMLAMultiStepDraftBackend:
# Cached variables for generate_draft_decode_kv_indices
self.pool_len = model_runner.req_to_token_pool.req_to_token.shape[1]
self.page_size = model_runner.server_args.page_size
def common_template(
self,
@@ -810,13 +809,14 @@ class FlashInferMLAMultiStepDraftBackend:
kv_indices_buffer,
self.kv_indptr,
forward_batch.positions,
num_seqs,
self.topk,
self.pool_len,
kv_indices_buffer.shape[1],
self.kv_indptr.shape[1],
next_power_of_2(num_seqs),
next_power_of_2(self.speculative_num_steps),
next_power_of_2(bs),
self.page_size,
)
assert forward_batch.spec_info is not None

View File

@@ -784,13 +784,14 @@ class TritonMultiStepDraftBackend:
kv_indices_buffer,
self.kv_indptr,
forward_batch.positions,
num_seqs,
self.topk,
self.pool_len,
kv_indices_buffer.shape[1],
self.kv_indptr.shape[1],
next_power_of_2(num_seqs),
next_power_of_2(self.speculative_num_steps),
next_power_of_2(bs),
self.page_size,
)
for i in range(self.speculative_num_steps):