From 42fc44100a277ad68b3ceaed1984cd95334ff5fb Mon Sep 17 00:00:00 2001 From: Ying Sheng Date: Sat, 12 Jul 2025 20:13:40 -0700 Subject: [PATCH] [minor] Add server_args check for Llama4 with hybrid (#7988) --- python/sglang/srt/server_args.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index c44f53f7e..a59bf815d 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1730,6 +1730,10 @@ class ServerArgs: else: self.lora_paths[lora_path] = lora_path + model_arch = get_model_arch(self) + if "Llama4" in model_arch and self.hybrid_kvcache_ratio is not None: + assert self.attention_backend == "fa3" + def prepare_server_args(argv: List[str]) -> ServerArgs: """