Disabling mixed chunked prefill when eagle is enabled (#6874)
This commit is contained in:
@@ -425,6 +425,12 @@ class ServerArgs:
|
|||||||
"Overlap scheduler is disabled because of using "
|
"Overlap scheduler is disabled because of using "
|
||||||
"eagle speculative decoding."
|
"eagle speculative decoding."
|
||||||
)
|
)
|
||||||
|
if self.enable_mixed_chunk:
|
||||||
|
self.enable_mixed_chunk = False
|
||||||
|
logger.warning(
|
||||||
|
"Mixed chunked prefill is disabled because of using "
|
||||||
|
"eagle speculative decoding."
|
||||||
|
)
|
||||||
|
|
||||||
model_arch = get_model_arch(self)
|
model_arch = get_model_arch(self)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user