Fix FP4 MoE accuracy from missing routed_scaling_factor (#8333)

2025-07-25 16:40:23 -07:00
parent f8ca2368b2
commit 58c468f404
2 changed files with 8 additions and 8 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -433,10 +433,6 @@ class ServerArgs:
                self.quantization == "modelopt_fp4"
            ), "modelopt_fp4 quantization is required for Flashinfer MOE"
            os.environ["TRTLLM_ENABLE_PDL"] = "1"
-            self.disable_shared_experts_fusion = True
-            logger.warning(
-                f"Flashinfer MoE is enabled. Shared expert fusion is disabled."
-            )

        # DeepEP MoE
        if self.enable_deepep_moe: