Support FA3 as Attention backend by using --attention-backend fa3 (#4680)

Co-authored-by: qsong <qsong@linkedin.com>
Co-authored-by: qingquansong <ustcsqq@gmail.com>
This commit is contained in:
Stefan He
2025-03-23 23:28:11 -07:00
committed by GitHub
parent af6535e7aa
commit 5d7edc8e55
5 changed files with 622 additions and 1 deletions

View File

@@ -770,7 +770,7 @@ class ServerArgs:
parser.add_argument(
"--attention-backend",
type=str,
choices=["flashinfer", "triton", "torch_native"],
choices=["flashinfer", "triton", "torch_native", "fa3"],
default=ServerArgs.attention_backend,
help="Choose the kernels for attention layers.",
)