fix: fix video input for qwen3-vl (#11442)

2025-10-14 00:30:43 +08:00
parent 54a46a264d
commit f35f120d70
5 changed files with 51 additions and 7 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -196,7 +196,6 @@ MAMBA_CACHE_SIZE_MAX_RUNNING_REQUESTS_RATIO = 3

 logger = logging.getLogger(__name__)

-
 if _is_npu:
    import torch_npu

@@ -636,6 +635,22 @@ class ModelRunner:
                    "Setting hicache_io_backend to vanilla I/O, which may lead to suboptimal performance with small page sizes."
                )

+        if self.model_config.hf_config.model_type == "qwen3_vl_moe":
+            if (
+                quantization_config := getattr(
+                    self.model_config.hf_config, "quantization_config", None
+                )
+            ) is not None:
+                text_config = self.model_config.hf_text_config
+                weight_block_size_n = quantization_config["weight_block_size"][0]
+                if (
+                    text_config.moe_intermediate_size
+                    // (self.tp_size // self.moe_ep_size)
+                ) % weight_block_size_n != 0:
+                    raise ValueError(
+                        f"For qwen3-vl-fp8 models, please make sure ({text_config.moe_intermediate_size=} // ({self.tp_size=} // {self.moe_ep_size=})) % {weight_block_size_n=} == 0"
+                    )
+
    def init_torch_distributed(self):
        logger.info("Init torch distributed begin.")