Add Llama4 support (#5092)

Co-authored-by: Cheng Wan <cwan39@gatech.edu> Co-authored-by: fzyzcjy <ch271828n@outlook.com> Co-authored-by: ispobock <ispobaoke@163.com>
2025-04-07 00:29:36 -07:00
parent d1bb171180
commit f04c80dc42
27 changed files with 2214 additions and 22 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -128,6 +128,7 @@ class ModelRunner:
            self.model_config.attention_arch == AttentionArch.MLA
            and not server_args.disable_mla
        )
+        self.attention_chunk_size = model_config.attention_chunk_size

        # Model-specific adjustment
        self.model_specific_adjustment()