Disabling mixed chunked prefill when eagle is enabled (#6874)

This commit is contained in:
Swipe4057
2025-06-07 13:06:58 +03:00
committed by GitHub
parent f1114e7ff3
commit e1ce44cdb1

View File

@@ -425,6 +425,12 @@ class ServerArgs:
"Overlap scheduler is disabled because of using "
"eagle speculative decoding."
)
if self.enable_mixed_chunk:
self.enable_mixed_chunk = False
logger.warning(
"Mixed chunked prefill is disabled because of using "
"eagle speculative decoding."
)
model_arch = get_model_arch(self)