[5/N] MoE Refactor: Update MoE parallelism arguments (#8658)
This commit is contained in:
@@ -499,7 +499,6 @@ class SRTRunner:
|
||||
chunked_prefill_size: Optional[int] = None,
|
||||
dp_size: int = 1,
|
||||
tokenizer_path: Optional[str] = None,
|
||||
enable_ep_moe: bool = False,
|
||||
mem_fraction_static: float = 0.65,
|
||||
trust_remote_code: bool = False,
|
||||
speculative_draft_model_path: Optional[str] = None,
|
||||
@@ -550,7 +549,6 @@ class SRTRunner:
|
||||
enable_dp_attention=enable_dp_attention,
|
||||
dp_size=dp_size,
|
||||
tokenizer_path=tokenizer_path,
|
||||
enable_ep_moe=enable_ep_moe,
|
||||
disable_overlap_schedule=disable_overlap_schedule,
|
||||
cuda_graph_max_bs=cuda_graph_max_bs,
|
||||
disable_custom_all_reduce=disable_custom_all_reduce,
|
||||
|
||||
Reference in New Issue
Block a user