refine some typo (#3473)
This commit is contained in:
@@ -30,7 +30,7 @@ def get_model_config(model_name: str, tp_size: int):
|
||||
topk = config.num_experts_per_tok
|
||||
intermediate_size = config.moe_intermediate_size
|
||||
shard_intermediate_size = 2 * intermediate_size // tp_size
|
||||
elif config.architectures[0] == "DeepseekV2ForCausalLM":
|
||||
elif config.architectures[0] in ["DeepseekV2ForCausalLM", "DeepseekV3ForCausalLM"]:
|
||||
E = config.n_routed_experts
|
||||
topk = config.num_experts_per_tok
|
||||
intermediate_size = config.intermediate_size
|
||||
|
||||
@@ -1094,7 +1094,7 @@ def fused_moe(
|
||||
- num_expert_group: Optional[int]: additional parameter for grouped_topk
|
||||
- topk_group: Optional[int]: additional parameter for grouped_topk
|
||||
- use_grouped_topk: If True, use grouped_topk instead of fused_topk
|
||||
note: Deepseekv2 model uses grouped_topk
|
||||
note: Deepseek V2/V3/R1 series models use grouped_topk
|
||||
- use_fp8_w8a8 (bool): If True, use fp8 arithmetic to compute the inner
|
||||
products for w1 and w2. Defaults to False.
|
||||
- use_int8_w8a16 (bool): If True, use fp8 arithmetic to compute the inner
|
||||
|
||||
@@ -75,7 +75,7 @@ def fused_topk(
|
||||
return topk_weights, topk_ids
|
||||
|
||||
|
||||
# This is used by the Deepseek-V2 model
|
||||
# This is used by the Deepseek V2/V3/R1 series models
|
||||
@torch.compile(dynamic=True, backend=get_compiler_backend())
|
||||
def grouped_topk(
|
||||
hidden_states: torch.Tensor,
|
||||
|
||||
@@ -795,7 +795,7 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--disable-mla",
|
||||
action="store_true",
|
||||
help="Disable Multi-head Latent Attention (MLA) for DeepSeek-V2.",
|
||||
help="Disable Multi-head Latent Attention (MLA) for DeepSeek V2/V3/R1 series models.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--disable-overlap-schedule",
|
||||
|
||||
Reference in New Issue
Block a user