[5/N] MoE Refactor: Update MoE parallelism arguments (#8658)

This commit is contained in:
Cheng Wan
2025-08-01 01:20:03 -07:00
committed by GitHub
parent c8d3a402c1
commit 6c88f6c8d9
38 changed files with 342 additions and 299 deletions

View File

@@ -499,7 +499,6 @@ class SRTRunner:
chunked_prefill_size: Optional[int] = None,
dp_size: int = 1,
tokenizer_path: Optional[str] = None,
enable_ep_moe: bool = False,
mem_fraction_static: float = 0.65,
trust_remote_code: bool = False,
speculative_draft_model_path: Optional[str] = None,
@@ -550,7 +549,6 @@ class SRTRunner:
enable_dp_attention=enable_dp_attention,
dp_size=dp_size,
tokenizer_path=tokenizer_path,
enable_ep_moe=enable_ep_moe,
disable_overlap_schedule=disable_overlap_schedule,
cuda_graph_max_bs=cuda_graph_max_bs,
disable_custom_all_reduce=disable_custom_all_reduce,