From fc42ff7b6377f4053187a6e172dd675fb442fc97 Mon Sep 17 00:00:00 2001 From: DarkSharpness <76582120+DarkSharpness@users.noreply.github.com> Date: Fri, 8 Aug 2025 21:21:17 -0700 Subject: [PATCH] [Fix] Fix wrong backend chosen in hybrid backend (#8989) --- python/sglang/srt/model_executor/model_runner.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 923482d72..317734578 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -378,6 +378,12 @@ class ModelRunner: ) server_args.attention_backend = "torch_native" + if server_args.prefill_attention_backend is not None and ( + server_args.prefill_attention_backend + == server_args.decode_attention_backend + ): # override the default attention backend + server_args.attention_backend = server_args.prefill_attention_backend + if server_args.attention_backend is None: """ Auto select the fastest attention backend.