Clean up allocators (#9134)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Lianmin Zheng
2025-08-13 13:56:04 -07:00
committed by GitHub
parent 2f20f43026
commit 9e426466af
16 changed files with 288 additions and 295 deletions

View File

@@ -575,6 +575,7 @@ class ServerArgs:
"Pipeline parallelism is incompatible with overlap schedule."
)
# Hicache
if self.hicache_storage_backend == "mooncake":
# to use mooncake storage backend, the following conditions must be met:
self.hicache_io_backend = "kernel"
@@ -1316,19 +1317,23 @@ class ServerArgs:
# Kernel backend
ATTN_BACKENDS = [
"aiter",
# Common
"triton",
"torch_native",
# NVIDIA specific
"cutlass_mla",
"fa3",
"flashinfer",
"flashmla",
"intel_amx",
"torch_native",
"ascend",
"triton",
"trtllm_mla",
"trtllm_mha",
"dual_chunk_flash_attn",
# AMD specific
"aiter",
"wave",
# Other platforms
"intel_amx",
"ascend",
]
parser.add_argument(
"--attention-backend",