Tiny cleanup some eagle unused codes (#11660)
This commit is contained in:
@@ -96,7 +96,6 @@ class DraftBackendFactory:
|
|||||||
FlashInferMultiStepDraftBackend,
|
FlashInferMultiStepDraftBackend,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.has_prefill_wrapper_verify = True
|
|
||||||
return FlashInferMultiStepDraftBackend(
|
return FlashInferMultiStepDraftBackend(
|
||||||
self.draft_model_runner, self.topk, self.speculative_num_steps
|
self.draft_model_runner, self.topk, self.speculative_num_steps
|
||||||
)
|
)
|
||||||
@@ -105,7 +104,6 @@ class DraftBackendFactory:
|
|||||||
FlashInferMLAMultiStepDraftBackend,
|
FlashInferMLAMultiStepDraftBackend,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.has_prefill_wrapper_verify = True
|
|
||||||
return FlashInferMLAMultiStepDraftBackend(
|
return FlashInferMLAMultiStepDraftBackend(
|
||||||
self.draft_model_runner, self.topk, self.speculative_num_steps
|
self.draft_model_runner, self.topk, self.speculative_num_steps
|
||||||
)
|
)
|
||||||
@@ -149,7 +147,6 @@ class DraftBackendFactory:
|
|||||||
TRTLLMHAAttnMultiStepDraftBackend,
|
TRTLLMHAAttnMultiStepDraftBackend,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.has_prefill_wrapper_verify = True
|
|
||||||
return TRTLLMHAAttnMultiStepDraftBackend(
|
return TRTLLMHAAttnMultiStepDraftBackend(
|
||||||
self.draft_model_runner, self.topk, self.speculative_num_steps
|
self.draft_model_runner, self.topk, self.speculative_num_steps
|
||||||
)
|
)
|
||||||
@@ -164,7 +161,6 @@ class DraftBackendFactory:
|
|||||||
TRTLLMMLAMultiStepDraftBackend,
|
TRTLLMMLAMultiStepDraftBackend,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.has_prefill_wrapper_verify = True
|
|
||||||
return TRTLLMMLAMultiStepDraftBackend(
|
return TRTLLMMLAMultiStepDraftBackend(
|
||||||
self.draft_model_runner, self.topk, self.speculative_num_steps
|
self.draft_model_runner, self.topk, self.speculative_num_steps
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -41,7 +41,6 @@ class EAGLEDraftCudaGraphRunner:
|
|||||||
# Parse args
|
# Parse args
|
||||||
self.eagle_worker = eagle_worker
|
self.eagle_worker = eagle_worker
|
||||||
self.model_runner = model_runner = eagle_worker.model_runner
|
self.model_runner = model_runner = eagle_worker.model_runner
|
||||||
self.model_runner: EAGLEWorker
|
|
||||||
self.graphs = {}
|
self.graphs = {}
|
||||||
self.output_buffers = {}
|
self.output_buffers = {}
|
||||||
self.enable_torch_compile = model_runner.server_args.enable_torch_compile
|
self.enable_torch_compile = model_runner.server_args.enable_torch_compile
|
||||||
|
|||||||
@@ -192,10 +192,6 @@ class EAGLEWorker(TpModelWorker):
|
|||||||
|
|
||||||
def init_attention_backend(self):
|
def init_attention_backend(self):
|
||||||
# Create multi-step attn backends and cuda graph runners
|
# Create multi-step attn backends and cuda graph runners
|
||||||
|
|
||||||
self.has_prefill_wrapper_verify = False
|
|
||||||
self.draft_extend_attn_backend = None
|
|
||||||
|
|
||||||
draft_backend_factory = DraftBackendFactory(
|
draft_backend_factory = DraftBackendFactory(
|
||||||
self.server_args,
|
self.server_args,
|
||||||
self.draft_model_runner,
|
self.draft_model_runner,
|
||||||
|
|||||||
Reference in New Issue
Block a user