fix: fix video input for qwen3-vl (#11442)

This commit is contained in:
Mick
2025-10-14 00:30:43 +08:00
committed by GitHub
parent 54a46a264d
commit f35f120d70
5 changed files with 51 additions and 7 deletions

View File

@@ -1142,6 +1142,13 @@ class MRotaryEmbedding(RotaryEmbedding):
second_per_grid_ts: Optional[torch.Tensor] = None,
**kwargs,
) -> Tuple[torch.Tensor, torch.Tensor]:
if (
model_type.startswith("qwen3_vl") or model_type.startswith("qwen3_vl_moe")
) and video_grid_thw is not None:
video_grid_thw = torch.repeat_interleave(
video_grid_thw, video_grid_thw[:, 0], dim=0
)
video_grid_thw[:, 0] = 1
mrope_position_deltas = []
if input_ids is not None and (
image_grid_thw is not None or video_grid_thw is not None

View File

@@ -25,7 +25,6 @@ import signal
import sys
import threading
import time
import uuid
from collections import deque
from contextlib import nullcontext
from datetime import datetime
@@ -360,7 +359,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
(
FreezeGCReq,
lambda x: None,
), # For handling case when scheduler skips detokenizer and forwards back to the tokenizer manager, we ignore it.
),
# For handling case when scheduler skips detokenizer and forwards back to the tokenizer manager, we ignore it.
(HealthCheckOutput, lambda x: None),
]
)
@@ -587,9 +587,9 @@ class TokenizerManager(TokenizerCommunicatorMixin):
)
if self.mm_processor and obj.contains_mm_input():
if not isinstance(obj.image_data, list):
if not isinstance(obj.image_data, list) and obj.image_data:
obj.image_data = [obj.image_data]
if not isinstance(obj.audio_data, list):
if not isinstance(obj.audio_data, list) and obj.audio_data:
obj.audio_data = [obj.audio_data]
mm_inputs: Dict = await self.mm_processor.process_mm_data_async(
image_data=obj.image_data,

View File

@@ -196,7 +196,6 @@ MAMBA_CACHE_SIZE_MAX_RUNNING_REQUESTS_RATIO = 3
logger = logging.getLogger(__name__)
if _is_npu:
import torch_npu
@@ -636,6 +635,22 @@ class ModelRunner:
"Setting hicache_io_backend to vanilla I/O, which may lead to suboptimal performance with small page sizes."
)
if self.model_config.hf_config.model_type == "qwen3_vl_moe":
if (
quantization_config := getattr(
self.model_config.hf_config, "quantization_config", None
)
) is not None:
text_config = self.model_config.hf_text_config
weight_block_size_n = quantization_config["weight_block_size"][0]
if (
text_config.moe_intermediate_size
// (self.tp_size // self.moe_ep_size)
) % weight_block_size_n != 0:
raise ValueError(
f"For qwen3-vl-fp8 models, please make sure ({text_config.moe_intermediate_size=} // ({self.tp_size=} // {self.moe_ep_size=})) % {weight_block_size_n=} == 0"
)
def init_torch_distributed(self):
logger.info("Init torch distributed begin.")