Revert "ROCm: Flex Attention Enablement with custom backends (#4178)" (#4186)

2025-03-07 10:27:52 -08:00
parent 0beea4503f
commit eb61f5c9af
7 changed files with 35 additions and 1434 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -710,23 +710,13 @@ class ServerArgs:
        )

        # Kernel backend
-        if is_hip():
-            parser.add_argument(
-                "--attention-backend",
-                type=str,
-                choices=["triton", "torch_native", "aiter", "aiter_decode"],
-                default=ServerArgs.attention_backend,
-                help="Choose the kernels for attention layers.",
-            )
-        else:
-            parser.add_argument(
-                "--attention-backend",
-                type=str,
-                choices=["flashinfer", "triton", "torch_native"],
-                default=ServerArgs.attention_backend,
-                help="Choose the kernels for attention layers.",
-            )
-
+        parser.add_argument(
+            "--attention-backend",
+            type=str,
+            choices=["flashinfer", "triton", "torch_native"],
+            default=ServerArgs.attention_backend,
+            help="Choose the kernels for attention layers.",
+        )
        parser.add_argument(
            "--sampling-backend",
            type=str,