Fix the deprecation warning for enable_flashinfer_mxfp4_moe (#9214)
This commit is contained in:
@@ -290,6 +290,7 @@ class ServerArgs:
|
|||||||
enable_flashinfer_cutlass_moe: bool = False
|
enable_flashinfer_cutlass_moe: bool = False
|
||||||
enable_flashinfer_trtllm_moe: bool = False
|
enable_flashinfer_trtllm_moe: bool = False
|
||||||
enable_triton_kernel_moe: bool = False
|
enable_triton_kernel_moe: bool = False
|
||||||
|
enable_flashinfer_mxfp4_moe: bool = False
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
# Check deprecated arguments
|
# Check deprecated arguments
|
||||||
@@ -321,6 +322,11 @@ class ServerArgs:
|
|||||||
print_deprecated_warning(
|
print_deprecated_warning(
|
||||||
"NOTE: --enable-flashinfer-trtllm-moe is deprecated. Please set `--moe-runner-backend` to 'flashinfer_trtllm' instead."
|
"NOTE: --enable-flashinfer-trtllm-moe is deprecated. Please set `--moe-runner-backend` to 'flashinfer_trtllm' instead."
|
||||||
)
|
)
|
||||||
|
if self.enable_flashinfer_mxfp4_moe:
|
||||||
|
self.moe_runner_backend = "flashinfer_mxfp4"
|
||||||
|
print_deprecated_warning(
|
||||||
|
"NOTE: --enable-flashinfer-mxfp4-moe is deprecated. Please set `--moe-runner-backend` to 'flashinfer_mxfp4' instead."
|
||||||
|
)
|
||||||
|
|
||||||
# Set missing default values
|
# Set missing default values
|
||||||
if self.tokenizer_path is None:
|
if self.tokenizer_path is None:
|
||||||
@@ -1851,11 +1857,6 @@ class ServerArgs:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Enable returning hidden states with responses.",
|
help="Enable returning hidden states with responses.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--enable-flashinfer-mxfp4-moe",
|
|
||||||
action="store_true",
|
|
||||||
help="Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--scheduler-recv-interval",
|
"--scheduler-recv-interval",
|
||||||
type=int,
|
type=int,
|
||||||
@@ -2001,6 +2002,11 @@ class ServerArgs:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="(Deprecated) Use triton moe grouped gemm kernel.",
|
help="(Deprecated) Use triton moe grouped gemm kernel.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--enable-flashinfer-mxfp4-moe",
|
||||||
|
action="store_true",
|
||||||
|
help="(Deprecated) Enable FlashInfer MXFP4 MoE backend for modelopt_fp4 quant on Blackwell.",
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_cli_args(cls, args: argparse.Namespace):
|
def from_cli_args(cls, args: argparse.Namespace):
|
||||||
|
|||||||
Reference in New Issue
Block a user