Set num_fused_shared_experts as num_shared_experts when shared_experts fusion is not disabled (#6736)
This commit is contained in:
@@ -207,7 +207,7 @@ class ServerArgs:
|
||||
flashinfer_mla_disable_ragged: bool = False
|
||||
warmups: Optional[str] = None
|
||||
moe_dense_tp_size: Optional[int] = None
|
||||
num_fused_shared_experts: int = 0
|
||||
disable_shared_experts_fusion: bool = False
|
||||
disable_chunked_prefix_cache: bool = False
|
||||
disable_fast_image_processor: bool = False
|
||||
mm_attention_backend: Optional[str] = None
|
||||
@@ -1384,13 +1384,10 @@ class ServerArgs:
|
||||
default=ServerArgs.deepep_config,
|
||||
help="Tuned DeepEP config suitable for your own cluster. It can be either a string with JSON content or a file path.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--num-fused-shared-experts",
|
||||
type=int,
|
||||
default=0,
|
||||
help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, "
|
||||
"set it to tp_size can get best optimized performance. Note that for architectures with SM==90, we have enabled the shared experts fusion optimization by default for DeepSeek V3/R1, with num_fused_shared_experts automatically set to the TP size.",
|
||||
"--disable-shared-experts-fusion",
|
||||
action="store_true",
|
||||
help="Disable shared experts fusion optimization for deepseek v3/r1.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-chunked-prefix-cache",
|
||||
|
||||
Reference in New Issue
Block a user