Sync cuda graph runners (#6976)

This commit is contained in:
Lianmin Zheng
2025-06-08 16:12:25 -07:00
committed by GitHub
parent 3712abfaf9
commit 0c1f03a23d
5 changed files with 56 additions and 49 deletions

View File

@@ -687,6 +687,7 @@ class EAGLEWorker(TpModelWorker):
batch.spec_info.prepare_extend_after_decode(
batch,
self.speculative_num_steps,
self.server_args.context_length,
pad_input=self.cuda_graph_runner_for_draft_extend is not None,
)
batch.spec_info.capture_hidden_mode = CaptureHiddenMode.LAST