Disabling mixed chunked prefill when eagle is enabled (#6874)
This commit is contained in:
@@ -425,6 +425,12 @@ class ServerArgs:
|
||||
"Overlap scheduler is disabled because of using "
|
||||
"eagle speculative decoding."
|
||||
)
|
||||
if self.enable_mixed_chunk:
|
||||
self.enable_mixed_chunk = False
|
||||
logger.warning(
|
||||
"Mixed chunked prefill is disabled because of using "
|
||||
"eagle speculative decoding."
|
||||
)
|
||||
|
||||
model_arch = get_model_arch(self)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user