From 8128e08d3652cef9821aac50a1f2ce99bb282c40 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Wed, 6 Aug 2025 09:53:45 +0800 Subject: [PATCH] Turn off hybrid cache by default (#8839) --- python/sglang/srt/server_args.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 0d64571c1..10e8278a6 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -461,6 +461,7 @@ class ServerArgs: if model_arch in ["GptOssForCausalLM"]: self.attention_backend = "triton" self.enable_triton_kernel_moe = True + self.disable_hybrid_swa_memory = True # Set page size if self.page_size is None: