[Fix] Fix wrong backend chosen in hybrid backend (#8989)

This commit is contained in:
DarkSharpness
2025-08-08 21:21:17 -07:00
committed by GitHub
parent 7c0db868a1
commit fc42ff7b63

View File

@@ -378,6 +378,12 @@ class ModelRunner:
)
server_args.attention_backend = "torch_native"
if server_args.prefill_attention_backend is not None and (
server_args.prefill_attention_backend
== server_args.decode_attention_backend
): # override the default attention backend
server_args.attention_backend = server_args.prefill_attention_backend
if server_args.attention_backend is None:
"""
Auto select the fastest attention backend.