Add speculator attention backend switch (#9981)

This commit is contained in:
cicirori
2025-09-08 06:44:36 +02:00
committed by GitHub
parent 3b99f23c44
commit 8c5930f08a
6 changed files with 130 additions and 54 deletions

View File

@@ -98,6 +98,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
"sampling_backend",
"speculative_accept_threshold_single",
"speculative_accept_threshold_acc",
"speculative_attention_backend",
"torchao_config",
"triton_attention_reduce_in_fp32",
"num_reserved_decode_tokens",