[Feature] Support Deepseek-VL2 (#2798)

Co-authored-by: Edenzzzz <wtan45@wisc.edu> Co-authored-by: Chayenne <zhaochen20@outlook.com> Co-authored-by: Yi Zhang <1109276519@qq.com>
2025-03-17 14:07:59 +08:00
parent 0212d2e288
commit d6d21640d3
13 changed files with 1259 additions and 2 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -266,6 +266,14 @@ class ModelRunner:
                server_args.chunked_prefill_size = -1
                server_args.disable_radix_cache = True

+            if self.model_config.hf_config.architectures == ["DeepseekVL2ForCausalLM"]:
+                # TODO: deepseek-vl2 does not support radix cache now, set disable_radix_cache=True automatically
+                logger.info(
+                    "Automatically turn off --chunked-prefill-size and disable radix cache for deekseek-vl2."
+                )
+                server_args.chunked_prefill_size = -1
+                server_args.disable_radix_cache = True
+
    def init_torch_distributed(self):
        logger.info("Init torch distributed begin.")