Misc fixes for eagle (flush_cache, CPU overhead) (#3014)
This commit is contained in:
@@ -1023,7 +1023,7 @@ class Scheduler:
|
||||
)
|
||||
|
||||
# Check for jump-forward
|
||||
if not self.disable_jump_forward:
|
||||
if not self.disable_jump_forward and batch.has_grammar:
|
||||
jump_forward_reqs = batch.check_for_jump_forward(self.pad_input_ids_func)
|
||||
self.waiting_queue.extend(jump_forward_reqs)
|
||||
if batch.is_empty():
|
||||
@@ -1564,6 +1564,15 @@ class Scheduler:
|
||||
self.grammar_backend.reset()
|
||||
self.req_to_token_pool.clear()
|
||||
self.token_to_kv_pool.clear()
|
||||
|
||||
if not self.spec_algorithm.is_none():
|
||||
self.draft_worker.model_runner.req_to_token_pool.clear()
|
||||
self.draft_worker.model_runner.token_to_kv_pool.clear()
|
||||
|
||||
self.num_generated_tokens = 0
|
||||
self.forward_ct_decode = 0
|
||||
self.spec_num_total_accepted_tokens = 0
|
||||
self.spec_num_total_forward_ct = 0
|
||||
torch.cuda.empty_cache()
|
||||
logger.info("Cache flushed successfully!")
|
||||
if_success = True
|
||||
|
||||
Reference in New Issue
Block a user