From 704160017dbcdc173c08ef23723fd70de9133aee Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Tue, 21 Oct 2025 17:19:57 -0700
Subject: [PATCH] fix: resolve flashinfer 0.4.1 import (#11940)

---
 python/sglang/srt/layers/quantization/modelopt_quant.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py
index 627e991c4..057f32a96 100755
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -1061,8 +1061,8 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
     ):
         from flashinfer import nvfp4_block_scale_interleave
         from flashinfer.fused_moe.core import (
-            _maybe_get_cached_w2_permute_indices,
             _maybe_get_cached_w3_w1_permute_indices,
+            get_w2_permute_indices_with_cache,
         )
 
         """Prepare quantized weights for kernel (done offline with weights)."""
@@ -1123,7 +1123,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
                 )
             )
 
-            permute_indices = _maybe_get_cached_w2_permute_indices(
+            permute_indices = get_w2_permute_indices_with_cache(
                 self._cache_permute_indices,
                 gemm2_weights_fp4[i].view(torch.uint8),
                 epilogue_tile_m,
@@ -1134,7 +1134,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
                 .contiguous()
             )
 
-            permute_sf_indices = _maybe_get_cached_w2_permute_indices(
+            permute_sf_indices = get_w2_permute_indices_with_cache(
                 self._cache_permute_indices,
                 gemm2_scales_linear_fp4[i].view(torch.uint8),
                 epilogue_tile_m,