[Fix] Fix wrong backend chosen in hybrid backend (#8989)

2025-08-08 21:21:17 -07:00
parent 7c0db868a1
commit fc42ff7b63
1 changed files with 6 additions and 0 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -378,6 +378,12 @@ class ModelRunner:
            )
            server_args.attention_backend = "torch_native"

+        if server_args.prefill_attention_backend is not None and (
+            server_args.prefill_attention_backend
+            == server_args.decode_attention_backend
+        ):  # override the default attention backend
+            server_args.attention_backend = server_args.prefill_attention_backend
+
        if server_args.attention_backend is None:
            """
            Auto select the fastest attention backend.