fix: fix video input for qwen3-vl (#11361)

2025-10-10 19:35:35 +08:00
parent 4299aebdbb
commit a1a20b4c7c
4 changed files with 45 additions and 3 deletions
--- a/python/sglang/srt/layers/rotary_embedding.py
+++ b/python/sglang/srt/layers/rotary_embedding.py
@@ -1126,6 +1126,11 @@ class MRotaryEmbedding(RotaryEmbedding):
        second_per_grid_ts: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        if model_type.startswith("qwen3_vl") and video_grid_thw is not None:
+            video_grid_thw = torch.repeat_interleave(
+                video_grid_thw, video_grid_thw[:, 0], dim=0
+            )
+            video_grid_thw[:, 0] = 1
        mrope_position_deltas = []
        if input_ids is not None and (
            image_grid_thw is not None or video_grid_thw is not None