From 3ae33fcd0a6a68feac788fcf2db3a088c526e657 Mon Sep 17 00:00:00 2001 From: Xiaoyu Zhang <35585791+BBuf@users.noreply.github.com> Date: Fri, 8 Aug 2025 01:02:40 +0800 Subject: [PATCH] Fix hopper launch gpt-oss model illegal memory (#8908) --- .../sglang/srt/layers/quantization/mxfp4.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/python/sglang/srt/layers/quantization/mxfp4.py b/python/sglang/srt/layers/quantization/mxfp4.py index 1d5f54deb..dc9a208fd 100644 --- a/python/sglang/srt/layers/quantization/mxfp4.py +++ b/python/sglang/srt/layers/quantization/mxfp4.py @@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import ( QuantizeMethodBase, ) from sglang.srt.layers.quantization.utils import is_layer_skipped +from sglang.srt.layers.utils import is_sm100_supported from sglang.srt.managers.schedule_batch import global_server_args_dict from sglang.srt.utils import ( direct_register_custom_op, @@ -28,6 +29,7 @@ from sglang.srt.utils import ( set_weight_attrs, ) +_is_sm100_supported = is_cuda() and is_sm100_supported() has_triton_kernels = importlib.util.find_spec("triton_kernels") is not None @@ -244,13 +246,17 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): # pad the intermediate size to be a multiple of 2 * mxfp4_block # for to hold non-uniform sharded tensor as well as swizzling - if self.use_flashinfer: - intermediate_size_per_partition_after_pad = round_up(intermediate_size, 256) - hidden_size = round_up(hidden_size, 256) - elif is_hip(): - intermediate_size_per_partition_after_pad = round_up(intermediate_size, 128) - else: - intermediate_size_per_partition_after_pad = round_up(intermediate_size, 64) + intermediate_size_per_partition_after_pad = intermediate_size + if _is_sm100_supported: + if self.use_flashinfer: + intermediate_size_per_partition_after_pad = round_up( + intermediate_size, 256 + ) + hidden_size = round_up(hidden_size, 256) + else: + intermediate_size_per_partition_after_pad = round_up( + intermediate_size, 64 + ) self.intermediate_size = intermediate_size_per_partition_after_pad