fix: fix video input for qwen3-vl (#11442)
This commit is contained in:
@@ -1142,6 +1142,13 @@ class MRotaryEmbedding(RotaryEmbedding):
|
||||
second_per_grid_ts: Optional[torch.Tensor] = None,
|
||||
**kwargs,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
if (
|
||||
model_type.startswith("qwen3_vl") or model_type.startswith("qwen3_vl_moe")
|
||||
) and video_grid_thw is not None:
|
||||
video_grid_thw = torch.repeat_interleave(
|
||||
video_grid_thw, video_grid_thw[:, 0], dim=0
|
||||
)
|
||||
video_grid_thw[:, 0] = 1
|
||||
mrope_position_deltas = []
|
||||
if input_ids is not None and (
|
||||
image_grid_thw is not None or video_grid_thw is not None
|
||||
|
||||
Reference in New Issue
Block a user