Update DeepSeek-R1-FP4 default config on blackwell (#11512)

2025-10-12 20:32:11 -07:00
parent 9a30914e94
commit a2b3d9b90b
1 changed files with 26 additions and 1 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -802,7 +802,32 @@ class ServerArgs:
        hf_config = self.get_hf_config()
        model_arch = hf_config.architectures[0]
-        if model_arch in ["GptOssForCausalLM"]:
+        if model_arch in ["DeepseekV3ForCausalLM"]:
            if is_cuda() and is_sm100_supported():
                if (
                    self.attention_backend is None
                    and self.prefill_attention_backend is None
                    and self.decode_attention_backend is None
                ):
                    self.attention_backend = "trtllm_mla"
                    logger.info(
                        "Use trtllm_mla as attention backend on sm100 for DeepseekV3ForCausalLM"
                    )
                if not self.enable_dp_attention:
                    self.enable_flashinfer_allreduce_fusion = True
                    logger.info(
                        "Enable FlashInfer AllReduce Fusion on sm100 for DeepseekV3ForCausalLM"
                    )
                if (
                    self.quantization == "modelopt_fp4"
                    and self.moe_runner_backend == "auto"
                ):
                    self.moe_runner_backend = "flashinfer_trtllm"
                    logger.info(
                        "Use flashinfer_trtllm as moe runner backend on sm100 for DeepseekV3ForCausalLM"
                    )
        elif model_arch in ["GptOssForCausalLM"]:
            if (
                self.attention_backend is None
                and self.prefill_attention_backend is None