Turn off hybrid cache by default (#8839)

This commit is contained in:
Ke Bao
2025-08-06 09:53:45 +08:00
committed by GitHub
parent 5d62b56f7e
commit 8128e08d36

View File

@@ -461,6 +461,7 @@ class ServerArgs:
if model_arch in ["GptOssForCausalLM"]:
self.attention_backend = "triton"
self.enable_triton_kernel_moe = True
self.disable_hybrid_swa_memory = True
# Set page size
if self.page_size is None: