Simplify prepare_extend_after_decode (#6987)

This commit is contained in:
Lianmin Zheng
2025-06-09 16:39:21 -07:00
committed by GitHub
parent a968c888c0
commit dc0705a504
9 changed files with 140 additions and 176 deletions

View File

@@ -1575,10 +1575,9 @@ class Scheduler(
num_accepted_tokens,
can_run_cuda_graph,
) = self.draft_worker.forward_batch_speculative_generation(batch)
self.spec_num_total_accepted_tokens += (
num_accepted_tokens + batch.batch_size()
)
self.spec_num_total_forward_ct += batch.batch_size()
bs = batch.batch_size()
self.spec_num_total_accepted_tokens += num_accepted_tokens + bs
self.spec_num_total_forward_ct += bs
self.num_generated_tokens += num_accepted_tokens
if self.pp_group.is_last_rank: