SWA Prefix Cache (#7367)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
2025-07-13 12:31:07 -07:00
parent 0c55cbcfc5
commit 9379da77de
16 changed files with 1742 additions and 158 deletions
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -711,7 +711,6 @@ def get_hybrid_layer_ids(model_architectures: List[str], num_hidden_layers: int)
            i for i in range(num_hidden_layers) if (i + 1) % 4 == 0
        ]
    else:
-        raise ValueError(
-            "get_hybrid_layer_ids is only implemented for Llama4ForConditionalGeneration"
-        )
+        swa_attention_layer_ids = None
+        full_attention_layer_ids = None
    return swa_attention_layer_ids, full_attention_layer_ids