[Fix] Fix wrong backend chosen in hybrid backend (#8989)
This commit is contained in:
@@ -378,6 +378,12 @@ class ModelRunner:
|
||||
)
|
||||
server_args.attention_backend = "torch_native"
|
||||
|
||||
if server_args.prefill_attention_backend is not None and (
|
||||
server_args.prefill_attention_backend
|
||||
== server_args.decode_attention_backend
|
||||
): # override the default attention backend
|
||||
server_args.attention_backend = server_args.prefill_attention_backend
|
||||
|
||||
if server_args.attention_backend is None:
|
||||
"""
|
||||
Auto select the fastest attention backend.
|
||||
|
||||
Reference in New Issue
Block a user