Better optimization log for gpt-oss model (#8953)

This commit is contained in:
Xiaoyu Zhang
2025-08-08 15:11:48 +08:00
committed by GitHub
parent 774b47f3f1
commit 0d1e27a0c5
2 changed files with 11 additions and 4 deletions

View File

@@ -488,8 +488,14 @@ class ServerArgs:
if is_sm100_supported() and is_mxfp4_quant_format:
self.enable_flashinfer_mxfp4_moe = True
self.enable_triton_kernel_moe = False
logger.info(
"Detected SM100 and MXFP4 quantization format for GPT-OSS model, enabling FlashInfer MXFP4 MOE kernel."
)
else:
self.enable_triton_kernel_moe = True
logger.info(
"Detected GPT-OSS model, enabling triton_kernels MOE kernel."
)
self.disable_hybrid_swa_memory = True