Support FA3 as Attention backend by using --attention-backend fa3 (#4680)
Co-authored-by: qsong <qsong@linkedin.com> Co-authored-by: qingquansong <ustcsqq@gmail.com>
This commit is contained in:
@@ -770,7 +770,7 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--attention-backend",
|
||||
type=str,
|
||||
choices=["flashinfer", "triton", "torch_native"],
|
||||
choices=["flashinfer", "triton", "torch_native", "fa3"],
|
||||
default=ServerArgs.attention_backend,
|
||||
help="Choose the kernels for attention layers.",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user