[PP] Fix init_memory_pool desync & add PP for mixtral (#6223)

This commit is contained in:
Ying Sheng
2025-05-12 12:38:09 -07:00
committed by GitHub
parent 12319a6787
commit bad7c26fdc
8 changed files with 179 additions and 47 deletions

View File

@@ -347,6 +347,12 @@ class ServerArgs:
f"DeepEP MoE is enabled. The expert parallel size is adjusted to be the same as the tensor parallel size[{self.tp_size}]."
)
if self.pp_size > 1:
self.disable_overlap_schedule = True
logger.warning(
"Pipeline parallelism is incompatible with overlap schedule."
)
# Speculative Decoding
if self.speculative_algorithm == "NEXTN":
# NEXTN shares the same implementation of EAGLE