Support custom DeepEP tuning config (#6257)
This commit is contained in:
@@ -169,6 +169,7 @@ class ServerArgs:
|
||||
enable_ep_moe: bool = False
|
||||
enable_deepep_moe: bool = False
|
||||
deepep_mode: Optional[Literal["auto", "normal", "low_latency"]] = "auto"
|
||||
deepep_config: Optional[str] = None
|
||||
enable_torch_compile: bool = False
|
||||
torch_compile_max_bs: int = 32
|
||||
cuda_graph_max_bs: Optional[int] = None
|
||||
@@ -1249,6 +1250,12 @@ class ServerArgs:
|
||||
default="auto",
|
||||
help="Select the mode when enable DeepEP MoE, could be `normal`, `low_latency` or `auto`. Default is `auto`, which means `low_latency` for decode batch and `normal` for prefill batch.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--deepep-config",
|
||||
type=str,
|
||||
default=ServerArgs.deepep_config,
|
||||
help="Tuned DeepEP config suitable for your own cluster.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--n-share-experts-fusion",
|
||||
|
||||
Reference in New Issue
Block a user