fix: fix video input for qwen3-vl (#11442)

This commit is contained in:
Mick
2025-10-14 00:30:43 +08:00
committed by GitHub
parent 54a46a264d
commit f35f120d70
5 changed files with 51 additions and 7 deletions

View File

@@ -1142,6 +1142,13 @@ class MRotaryEmbedding(RotaryEmbedding):
second_per_grid_ts: Optional[torch.Tensor] = None,
**kwargs,
) -> Tuple[torch.Tensor, torch.Tensor]:
if (
model_type.startswith("qwen3_vl") or model_type.startswith("qwen3_vl_moe")
) and video_grid_thw is not None:
video_grid_thw = torch.repeat_interleave(
video_grid_thw, video_grid_thw[:, 0], dim=0
)
video_grid_thw[:, 0] = 1
mrope_position_deltas = []
if input_ids is not None and (
image_grid_thw is not None or video_grid_thw is not None