refine some typo (#3473)

This commit is contained in:
Xiaoyu Zhang
2025-02-10 23:35:44 +08:00
committed by GitHub
parent 4fe92bfca5
commit 2f47d710ae
4 changed files with 4 additions and 4 deletions

View File

@@ -1094,7 +1094,7 @@ def fused_moe(
- num_expert_group: Optional[int]: additional parameter for grouped_topk
- topk_group: Optional[int]: additional parameter for grouped_topk
- use_grouped_topk: If True, use grouped_topk instead of fused_topk
note: Deepseekv2 model uses grouped_topk
note: Deepseek V2/V3/R1 series models use grouped_topk
- use_fp8_w8a8 (bool): If True, use fp8 arithmetic to compute the inner
products for w1 and w2. Defaults to False.
- use_int8_w8a16 (bool): If True, use fp8 arithmetic to compute the inner

View File

@@ -75,7 +75,7 @@ def fused_topk(
return topk_weights, topk_ids
# This is used by the Deepseek-V2 model
# This is used by the Deepseek V2/V3/R1 series models
@torch.compile(dynamic=True, backend=get_compiler_backend())
def grouped_topk(
hidden_states: torch.Tensor,

View File

@@ -795,7 +795,7 @@ class ServerArgs:
parser.add_argument(
"--disable-mla",
action="store_true",
help="Disable Multi-head Latent Attention (MLA) for DeepSeek-V2.",
help="Disable Multi-head Latent Attention (MLA) for DeepSeek V2/V3/R1 series models.",
)
parser.add_argument(
"--disable-overlap-schedule",