From fc42ff7b6377f4053187a6e172dd675fb442fc97 Mon Sep 17 00:00:00 2001
From: DarkSharpness <76582120+DarkSharpness@users.noreply.github.com>
Date: Fri, 8 Aug 2025 21:21:17 -0700
Subject: [PATCH] [Fix] Fix wrong backend chosen in hybrid backend (#8989)

---
 python/sglang/srt/model_executor/model_runner.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 923482d72..317734578 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -378,6 +378,12 @@ class ModelRunner:
             )
             server_args.attention_backend = "torch_native"
 
+        if server_args.prefill_attention_backend is not None and (
+            server_args.prefill_attention_backend
+            == server_args.decode_attention_backend
+        ):  # override the default attention backend
+            server_args.attention_backend = server_args.prefill_attention_backend
+
         if server_args.attention_backend is None:
             """
             Auto select the fastest attention backend.