Sync cuda graph runners (#6976)
This commit is contained in:
@@ -687,6 +687,7 @@ class EAGLEWorker(TpModelWorker):
|
||||
batch.spec_info.prepare_extend_after_decode(
|
||||
batch,
|
||||
self.speculative_num_steps,
|
||||
self.server_args.context_length,
|
||||
pad_input=self.cuda_graph_runner_for_draft_extend is not None,
|
||||
)
|
||||
batch.spec_info.capture_hidden_mode = CaptureHiddenMode.LAST
|
||||
|
||||
Reference in New Issue
Block a user