[Eagle] reduce one draft forward (#3468)

This commit is contained in:
Ying Sheng
2025-02-10 04:21:49 -08:00
committed by GitHub
parent 2d61132374
commit d23cb9a01e
2 changed files with 5 additions and 1 deletions

View File

@@ -234,6 +234,10 @@ class EAGLEWorker(TpModelWorker):
token_list.append(tree_info[1])
parents_list.append(tree_info[2])
# we don't need to run the last forward. we get 1 token from draft prefill and (#spec steps - 1) tokens here
if i == self.speculative_num_steps - 1:
break
# Set inputs
forward_batch.input_ids = input_ids
forward_batch.out_cache_loc = out_cache_loc[