diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py
index de72d019a..89ecd44f5 100755
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -642,10 +642,22 @@ class ModelOptFp4Config(QuantizationConfig):
     def is_layer_excluded(self, prefix: str, exclude_modules: list):
         import regex as re
 
+        fused_patterns = ["q_a_proj", "q_b_proj", "kv_a_proj_with_mqa", "kv_b_proj"]
+        prefix_split = prefix.split(".")
         for pattern in exclude_modules:
             regex_str = pattern.replace(".", r"\.").replace("*", r".*")
+            pattern_split = pattern.split(".")
             if re.fullmatch(regex_str, prefix):
                 return True
+            elif (
+                pattern_split[-1] in fused_patterns
+                and pattern_split[-1] in prefix_split[-1]
+            ):
+                # Check if the last part of the excluded pattern is contained in the last part of the prefix
+                # This handles fused modules like fused_qkv_a_proj_with_mqa that contain q_a_proj and kv_a_proj_with_mqa
+                # e.g., model.layers.{i}.self_attn.{fused_weight_name}
+                assert len(prefix_split) == 5 and len(pattern_split) == 5
+                return True
         return False
 
     def get_quant_method(
@@ -1250,8 +1262,6 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
                 layer.w13_weight_scale,
             )
 
-            logger.info_once("Applied flashinfer weight processing for both w13 and w2")
-
         else:
             # CUTLASS processing - handle w13 and w2 separately
 
@@ -1268,7 +1278,6 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
             layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)
 
             # Both flashinfer cutlass and regular cutlass use same processing for w2
-            logger.info_once("Applied weight processing for both w13 and w2")
 
             # Set up CUTLASS MoE parameters
             device = layer.w13_weight.device
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index 7cd48da0e..5dfce426e 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -654,11 +654,13 @@ class ServerArgs:
             ], "The expert parallel size must be 1 or the same as the tensor parallel size"
 
         if self.moe_runner_backend == "flashinfer_trtllm":
-            if not self.disable_shared_experts_fusion:
-                self.disable_shared_experts_fusion = True
-                logger.warning(
-                    "FlashInfer TRTLLM MoE is enabled. --disable-shared-experts-fusion is automatically set."
-                )
+            assert (
+                self.quantization == "modelopt_fp4" or self.quantization == "fp8"
+            ), "modelopt_fp4 quantization is required for Flashinfer TRTLLM MoE"
+            self.disable_shared_experts_fusion = True
+            logger.warning(
+                "FlashInfer TRTLLM MoE is enabled. --disable-shared-experts-fusion is automatically set."
+            )
 
         # DeepEP MoE
         if self.moe_a2a_backend == "deepep":