Clean up allocators (#9134)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-08-13 13:56:04 -07:00
parent 2f20f43026
commit 9e426466af
16 changed files with 288 additions and 295 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -575,6 +575,7 @@ class ServerArgs:
                "Pipeline parallelism is incompatible with overlap schedule."
            )

+        # Hicache
        if self.hicache_storage_backend == "mooncake":
            # to use mooncake storage backend, the following conditions must be met:
            self.hicache_io_backend = "kernel"
@@ -1316,19 +1317,23 @@ class ServerArgs:

        # Kernel backend
        ATTN_BACKENDS = [
-            "aiter",
+            # Common
+            "triton",
+            "torch_native",
+            # NVIDIA specific
            "cutlass_mla",
            "fa3",
            "flashinfer",
            "flashmla",
-            "intel_amx",
-            "torch_native",
-            "ascend",
-            "triton",
            "trtllm_mla",
            "trtllm_mha",
            "dual_chunk_flash_attn",
+            # AMD specific
+            "aiter",
            "wave",
+            # Other platforms
+            "intel_amx",
+            "ascend",
        ]
        parser.add_argument(
            "--attention-backend",