Update DeepSeek-R1-FP4 default config on blackwell (#11512)

This commit is contained in:
Qiaolin Yu
2025-10-12 20:32:11 -07:00
committed by GitHub
parent 9a30914e94
commit a2b3d9b90b

View File

@@ -802,7 +802,32 @@ class ServerArgs:
hf_config = self.get_hf_config()
model_arch = hf_config.architectures[0]
if model_arch in ["GptOssForCausalLM"]:
if model_arch in ["DeepseekV3ForCausalLM"]:
if is_cuda() and is_sm100_supported():
if (
self.attention_backend is None
and self.prefill_attention_backend is None
and self.decode_attention_backend is None
):
self.attention_backend = "trtllm_mla"
logger.info(
"Use trtllm_mla as attention backend on sm100 for DeepseekV3ForCausalLM"
)
if not self.enable_dp_attention:
self.enable_flashinfer_allreduce_fusion = True
logger.info(
"Enable FlashInfer AllReduce Fusion on sm100 for DeepseekV3ForCausalLM"
)
if (
self.quantization == "modelopt_fp4"
and self.moe_runner_backend == "auto"
):
self.moe_runner_backend = "flashinfer_trtllm"
logger.info(
"Use flashinfer_trtllm as moe runner backend on sm100 for DeepseekV3ForCausalLM"
)
elif model_arch in ["GptOssForCausalLM"]:
if (
self.attention_backend is None
and self.prefill_attention_backend is None