Clean up server_args, triton cache manager (#8332)

This commit is contained in:
Lianmin Zheng
2025-07-25 14:14:51 -07:00
committed by GitHub
parent f8260f2539
commit ed2e313eb6
12 changed files with 128 additions and 204 deletions

View File

@@ -336,7 +336,6 @@ class EAGLEDraftCudaGraphRunner:
forward_batch.req_pool_indices = self.req_pool_indices[:bs]
forward_batch.positions = self.positions[:num_tokens]
# Special handle for seq_len_cpu used when flashinfer mla is used
if forward_batch.seq_lens_cpu is not None:
if bs != raw_bs:
self.seq_lens_cpu.fill_(self.seq_len_fill_value)