Simplify prepare_extend_after_decode (#6987)

This commit is contained in:
Lianmin Zheng
2025-06-09 16:39:21 -07:00
committed by GitHub
parent a968c888c0
commit dc0705a504
9 changed files with 140 additions and 176 deletions

View File

@@ -1013,13 +1013,13 @@ class ServerArgs:
type=str,
choices=[
"aiter",
"flashinfer",
"triton",
"torch_native",
"fa3",
"flashmla",
"cutlass_mla",
"fa3",
"flashinfer",
"flashmla",
"intel_amx",
"torch_native",
"triton",
],
default=ServerArgs.attention_backend,
help="Choose the kernels for attention layers.",