From 704160017dbcdc173c08ef23723fd70de9133aee Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Tue, 21 Oct 2025 17:19:57 -0700 Subject: [PATCH] fix: resolve flashinfer 0.4.1 import (#11940) --- python/sglang/srt/layers/quantization/modelopt_quant.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 627e991c4..057f32a96 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -1061,8 +1061,8 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): ): from flashinfer import nvfp4_block_scale_interleave from flashinfer.fused_moe.core import ( - _maybe_get_cached_w2_permute_indices, _maybe_get_cached_w3_w1_permute_indices, + get_w2_permute_indices_with_cache, ) """Prepare quantized weights for kernel (done offline with weights).""" @@ -1123,7 +1123,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): ) ) - permute_indices = _maybe_get_cached_w2_permute_indices( + permute_indices = get_w2_permute_indices_with_cache( self._cache_permute_indices, gemm2_weights_fp4[i].view(torch.uint8), epilogue_tile_m, @@ -1134,7 +1134,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): .contiguous() ) - permute_sf_indices = _maybe_get_cached_w2_permute_indices( + permute_sf_indices = get_w2_permute_indices_with_cache( self._cache_permute_indices, gemm2_scales_linear_fp4[i].view(torch.uint8), epilogue_tile_m,