Add Llama4 support (#5092)

Co-authored-by: Cheng Wan <cwan39@gatech.edu> Co-authored-by: fzyzcjy <ch271828n@outlook.com> Co-authored-by: ispobock <ispobaoke@163.com>
2025-04-07 00:29:36 -07:00
parent d1bb171180
commit f04c80dc42
27 changed files with 2214 additions and 22 deletions
--- a/python/sglang/srt/configs/model_config.py
+++ b/python/sglang/srt/configs/model_config.py
@@ -65,6 +65,9 @@ class ModelConfig:
            **kwargs,
        )
        self.hf_text_config = get_hf_text_config(self.hf_config)
+        self.attention_chunk_size = getattr(
+            self.hf_text_config, "attention_chunk_size", None
+        )

        # Check model type
        self.is_generation = is_generation_model(
@@ -467,6 +470,7 @@ multimodal_model_archs = [
    "Gemma3ForConditionalGeneration",
    "Grok1VForCausalLM",
    "Grok1AForCausalLM",
+    # TODO: add multimodal support for "Llama4ForConditionalGeneration",
    "LlavaLlamaForCausalLM",
    "LlavaMistralForCausalLM",
    "LlavaQwenForCausalLM",