Fix the deprecation warning for enable_flashinfer_mxfp4_moe (#9214)

This commit is contained in:
Cheng Wan
2025-08-14 23:59:35 -07:00
committed by GitHub
parent d4db9b028b
commit e3e75a786a

View File

@@ -290,6 +290,7 @@ class ServerArgs:
enable_flashinfer_cutlass_moe: bool = False
enable_flashinfer_trtllm_moe: bool = False
enable_triton_kernel_moe: bool = False
enable_flashinfer_mxfp4_moe: bool = False
def __post_init__(self):
# Check deprecated arguments
@@ -321,6 +322,11 @@ class ServerArgs:
print_deprecated_warning(
"NOTE: --enable-flashinfer-trtllm-moe is deprecated. Please set `--moe-runner-backend` to 'flashinfer_trtllm' instead."
)
if self.enable_flashinfer_mxfp4_moe:
self.moe_runner_backend = "flashinfer_mxfp4"
print_deprecated_warning(
"NOTE: --enable-flashinfer-mxfp4-moe is deprecated. Please set `--moe-runner-backend` to 'flashinfer_mxfp4' instead."
)
# Set missing default values
if self.tokenizer_path is None:
@@ -1851,11 +1857,6 @@ class ServerArgs:
action="store_true",
help="Enable returning hidden states with responses.",
)
parser.add_argument(
"--enable-flashinfer-mxfp4-moe",
action="store_true",
help="Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
)
parser.add_argument(
"--scheduler-recv-interval",
type=int,
@@ -2001,6 +2002,11 @@ class ServerArgs:
action="store_true",
help="(Deprecated) Use triton moe grouped gemm kernel.",
)
parser.add_argument(
"--enable-flashinfer-mxfp4-moe",
action="store_true",
help="(Deprecated) Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):