Add speculator attention backend switch (#9981)
This commit is contained in:
@@ -98,6 +98,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
|
||||
"sampling_backend",
|
||||
"speculative_accept_threshold_single",
|
||||
"speculative_accept_threshold_acc",
|
||||
"speculative_attention_backend",
|
||||
"torchao_config",
|
||||
"triton_attention_reduce_in_fp32",
|
||||
"num_reserved_decode_tokens",
|
||||
|
||||
Reference in New Issue
Block a user