model: support qwen3-vl series (#10323)

Co-authored-by: ocss884 <ocss.lin@gmail.com> Co-authored-by: cao1zhg <653506626@qq.com> Co-authored-by: yhyang201 <yhyang201@gmail.com> Co-authored-by: yhyang201 <47235274+yhyang201@users.noreply.github.com> Co-authored-by: 瑀澈 <yuche.lz@alibaba-inc.com> Co-authored-by: Mick <mickjagger19@icloud.com> Co-authored-by: Yineng Zhang <me@zhyncs.com>
2025-09-24 01:15:52 +08:00
parent 98c3b04ff2
commit 4f564b9e83
7 changed files with 1898 additions and 8 deletions
--- a/python/sglang/srt/multimodal/processors/qwen_vl.py
+++ b/python/sglang/srt/multimodal/processors/qwen_vl.py
@@ -12,6 +12,8 @@ from torchvision.transforms import InterpolationMode
 from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
 from sglang.srt.models.qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
 from sglang.srt.models.qwen2_vl import Qwen2VLForConditionalGeneration
+from sglang.srt.models.qwen3_vl import Qwen3VLForConditionalGeneration
+from sglang.srt.models.qwen3_vl_moe import Qwen3VLMoeForConditionalGeneration
 from sglang.srt.multimodal.processors.base_processor import (
    BaseMultimodalProcessor as SGLangBaseProcessor,
 )
@@ -209,7 +211,12 @@ async def preprocess_video(

 # Compatible with Qwen2VL and Qwen2_5VL
 class Qwen2_5VLImageProcessor(SGLangBaseProcessor):
-    models = [Qwen2VLForConditionalGeneration, Qwen2_5_VLForConditionalGeneration]
+    models = [
+        Qwen2VLForConditionalGeneration,
+        Qwen2_5_VLForConditionalGeneration,
+        Qwen3VLForConditionalGeneration,
+        Qwen3VLMoeForConditionalGeneration,
+    ]

    def __init__(self, hf_config, server_args, _processor, *args, **kwargs):
        super().__init__(hf_config, server_args, _processor, *args, **kwargs)