Turn off hybrid cache by default (#8839)
This commit is contained in:
@@ -461,6 +461,7 @@ class ServerArgs:
|
|||||||
if model_arch in ["GptOssForCausalLM"]:
|
if model_arch in ["GptOssForCausalLM"]:
|
||||||
self.attention_backend = "triton"
|
self.attention_backend = "triton"
|
||||||
self.enable_triton_kernel_moe = True
|
self.enable_triton_kernel_moe = True
|
||||||
|
self.disable_hybrid_swa_memory = True
|
||||||
|
|
||||||
# Set page size
|
# Set page size
|
||||||
if self.page_size is None:
|
if self.page_size is None:
|
||||||
|
|||||||
Reference in New Issue
Block a user