Revert "[EAGLE] Refactor code for page size > 1 & more simplifications" (#7210)
This commit is contained in:
@@ -1049,13 +1049,14 @@ class FlashInferMultiStepDraftBackend:
|
||||
kv_indices_buffer,
|
||||
self.kv_indptr,
|
||||
forward_batch.positions,
|
||||
num_seqs,
|
||||
self.topk,
|
||||
self.pool_len,
|
||||
kv_indices_buffer.shape[1],
|
||||
self.kv_indptr.shape[1],
|
||||
next_power_of_2(num_seqs),
|
||||
next_power_of_2(self.speculative_num_steps),
|
||||
next_power_of_2(bs),
|
||||
self.page_size,
|
||||
)
|
||||
|
||||
assert forward_batch.spec_info is not None
|
||||
|
||||
@@ -789,7 +789,6 @@ class FlashInferMLAMultiStepDraftBackend:
|
||||
|
||||
# Cached variables for generate_draft_decode_kv_indices
|
||||
self.pool_len = model_runner.req_to_token_pool.req_to_token.shape[1]
|
||||
self.page_size = model_runner.server_args.page_size
|
||||
|
||||
def common_template(
|
||||
self,
|
||||
@@ -810,13 +809,14 @@ class FlashInferMLAMultiStepDraftBackend:
|
||||
kv_indices_buffer,
|
||||
self.kv_indptr,
|
||||
forward_batch.positions,
|
||||
num_seqs,
|
||||
self.topk,
|
||||
self.pool_len,
|
||||
kv_indices_buffer.shape[1],
|
||||
self.kv_indptr.shape[1],
|
||||
next_power_of_2(num_seqs),
|
||||
next_power_of_2(self.speculative_num_steps),
|
||||
next_power_of_2(bs),
|
||||
self.page_size,
|
||||
)
|
||||
|
||||
assert forward_batch.spec_info is not None
|
||||
|
||||
@@ -784,13 +784,14 @@ class TritonMultiStepDraftBackend:
|
||||
kv_indices_buffer,
|
||||
self.kv_indptr,
|
||||
forward_batch.positions,
|
||||
num_seqs,
|
||||
self.topk,
|
||||
self.pool_len,
|
||||
kv_indices_buffer.shape[1],
|
||||
self.kv_indptr.shape[1],
|
||||
next_power_of_2(num_seqs),
|
||||
next_power_of_2(self.speculative_num_steps),
|
||||
next_power_of_2(bs),
|
||||
self.page_size,
|
||||
)
|
||||
|
||||
for i in range(self.speculative_num_steps):
|
||||
|
||||
Reference in New Issue
Block a user