Support FA3 backend for gpt-oss (#9028)

2025-08-14 01:41:50 +08:00
parent 4a16a71c36
commit 0ff6d1fce1
4 changed files with 24 additions and 6 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -2106,10 +2106,10 @@ class ServerArgs:
        if model_arch in ["GptOssForCausalLM"]:
            if self.attention_backend is None:
                self.attention_backend = "triton"
-            assert self.attention_backend in [
-                "triton",
-                "trtllm_mha",
-            ], f"GptOssForCausalLM requires 'triton' or 'trtllm_mha' attention backend, but got {self.attention_backend}"
+            supported_backends = ["triton", "trtllm_mha", "fa3"]
+            assert (
+                self.attention_backend in supported_backends
+            ), f"GptOssForCausalLM requires one of {supported_backends} attention backend, but got '{self.attention_backend}'"
            quantization_config = getattr(hf_config, "quantization_config", None)
            is_mxfp4_quant_format = (
                quantization_config is not None