Misc fixes for eagle (flush_cache, CPU overhead) (#3014)

This commit is contained in:
Lianmin Zheng
2025-01-20 20:25:13 -08:00
parent d2571dd5c7
commit 287d07a669
11 changed files with 133 additions and 96 deletions

View File

@@ -1023,7 +1023,7 @@ class Scheduler:
)
# Check for jump-forward
if not self.disable_jump_forward:
if not self.disable_jump_forward and batch.has_grammar:
jump_forward_reqs = batch.check_for_jump_forward(self.pad_input_ids_func)
self.waiting_queue.extend(jump_forward_reqs)
if batch.is_empty():
@@ -1564,6 +1564,15 @@ class Scheduler:
self.grammar_backend.reset()
self.req_to_token_pool.clear()
self.token_to_kv_pool.clear()
if not self.spec_algorithm.is_none():
self.draft_worker.model_runner.req_to_token_pool.clear()
self.draft_worker.model_runner.token_to_kv_pool.clear()
self.num_generated_tokens = 0
self.forward_ct_decode = 0
self.spec_num_total_accepted_tokens = 0
self.spec_num_total_forward_ct = 0
torch.cuda.empty_cache()
logger.info("Cache flushed successfully!")
if_success = True