[ModelRunner] Add hunyuan-vl basic support (#5151)

### What this PR does / why we need it? This patch add handling of `XDRotaryEmbedding` in modelrunner to support for `hunyuan-vl` ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? CI passed with added/exist tests Closes: https://github.com/vllm-project/vllm-ascend/issues/4992 - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-12-23 10:46:54 +08:00
parent c9b5881bcd
commit 9a79cbaecb
3 changed files with 63 additions and 25 deletions
--- a/tests/e2e/singlecard/test_vlm.py
+++ b/tests/e2e/singlecard/test_vlm.py
@@ -27,28 +27,32 @@ from vllm.assets.image import ImageAsset
 from tests.e2e.conftest import VllmRunner


-def test_multimodal_vl(prompt_template):
-    image = ImageAsset("cherry_blossom") \
-        .pil_image.convert("RGB")
+def test_multimodal_vl(vl_config):
+    image = ImageAsset("cherry_blossom").pil_image.convert("RGB")
+
    img_questions = [
        "What is the content of this image?",
        "Describe the content of this image in detail.",
        "What's in the image?",
        "Where is this image taken?",
    ]
+
    images = [image] * len(img_questions)
-    prompts = prompt_template(img_questions)
-    with VllmRunner("Qwen/Qwen3-VL-8B-Instruct",
-                    mm_processor_kwargs={
-                        "min_pixels": 28 * 28,
-                        "max_pixels": 1280 * 28 * 28,
-                        "fps": 1,
-                    },
-                    enforce_eager=False) as vllm_model:
-        outputs = vllm_model.generate_greedy(prompts=prompts,
-                                             images=images,
-                                             max_tokens=64)
+    prompts = vl_config["prompt_fn"](img_questions)
+
+    with VllmRunner(vl_config["model"],
+                    mm_processor_kwargs=vl_config["mm_processor_kwargs"],
+                    enforce_eager=False,
+                    max_model_len=8192,
+                    limit_mm_per_prompt={"image": 1}) as vllm_model:
+        outputs = vllm_model.generate_greedy(
+            prompts=prompts,
+            images=images,
+            max_tokens=64,
+        )
+
        assert len(outputs) == len(prompts)
+
        for _, output_str in outputs:
            assert output_str, "Generated output should not be empty."