Revert "ROCm: Flex Attention Enablement with custom backends (#4178)" (#4186)

This commit is contained in:
Yineng Zhang
2025-03-07 10:27:52 -08:00
committed by GitHub
parent 0beea4503f
commit eb61f5c9af
7 changed files with 35 additions and 1434 deletions

View File

@@ -710,23 +710,13 @@ class ServerArgs:
)
# Kernel backend
if is_hip():
parser.add_argument(
"--attention-backend",
type=str,
choices=["triton", "torch_native", "aiter", "aiter_decode"],
default=ServerArgs.attention_backend,
help="Choose the kernels for attention layers.",
)
else:
parser.add_argument(
"--attention-backend",
type=str,
choices=["flashinfer", "triton", "torch_native"],
default=ServerArgs.attention_backend,
help="Choose the kernels for attention layers.",
)
parser.add_argument(
"--attention-backend",
type=str,
choices=["flashinfer", "triton", "torch_native"],
default=ServerArgs.attention_backend,
help="Choose the kernels for attention layers.",
)
parser.add_argument(
"--sampling-backend",
type=str,