[NVIDIA] [3/N] Nvfp4 Masked Gemm: Add flashinfer grouped_gemm_nt_masked (#9199)
This commit is contained in:
@@ -399,6 +399,7 @@ class ServerArgs:
|
||||
enable_ep_moe: bool = False
|
||||
enable_deepep_moe: bool = False
|
||||
enable_flashinfer_cutlass_moe: bool = False
|
||||
enable_flashinfer_cutedsl_moe: bool = False
|
||||
enable_flashinfer_trtllm_moe: bool = False
|
||||
enable_triton_kernel_moe: bool = False
|
||||
enable_flashinfer_mxfp4_moe: bool = False
|
||||
@@ -420,6 +421,11 @@ class ServerArgs:
|
||||
print_deprecated_warning(
|
||||
"NOTE: --enable-triton-kernel-moe is deprecated. Please set `--moe-runner-backend` to 'triton_kernel' instead."
|
||||
)
|
||||
if self.enable_flashinfer_cutedsl_moe:
|
||||
self.moe_runner_backend = "flashinfer_cutedsl"
|
||||
print_deprecated_warning(
|
||||
"NOTE: --enable-flashinfer-cutedsl-moe is deprecated. Please set `--moe-runner-backend` to 'flashinfer_cutedsl' instead."
|
||||
)
|
||||
if self.enable_flashinfer_cutlass_moe:
|
||||
self.moe_runner_backend = "flashinfer_cutlass"
|
||||
print_deprecated_warning(
|
||||
@@ -1622,6 +1628,7 @@ class ServerArgs:
|
||||
"flashinfer_trtllm",
|
||||
"flashinfer_cutlass",
|
||||
"flashinfer_mxfp4",
|
||||
"flashinfer_cutedsl",
|
||||
],
|
||||
default=ServerArgs.moe_runner_backend,
|
||||
help="Choose the runner backend for MoE.",
|
||||
@@ -2204,6 +2211,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="(Deprecated) Enable FlashInfer CUTLASS MoE backend for modelopt_fp4 quant on Blackwell. Supports MoE-EP",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-flashinfer-cutedsl-moe",
|
||||
action="store_true",
|
||||
help="(Deprecated) Enable FlashInfer CuteDSL MoE backend for modelopt_fp4 quant on Blackwell. Supports MoE-EP",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-flashinfer-trtllm-moe",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user