From e33751ef8bd42655ffa564332657cd1fa117c1da Mon Sep 17 00:00:00 2001
From: whx <56632993+whx-sjtu@users.noreply.github.com>
Date: Sat, 25 Oct 2025 09:41:33 +0800
Subject: [PATCH] [BugFix][Core] Fix a bug running multi-modal with
 ascend_scheduler (#3675)

This PR fix the bug related with running multi-modal models with
AscendScheduler. This bug was introduced by PR #2372 by using the same
parameter names as vLLM with different default values.

Currently I fix this bug by changing the default values of these two
parameters to align with vLLM.

- vLLM version: v0.11.0rc3
- vLLM main:
https://github.com/vllm-project/vllm/commit/17c540a993af88204ad1b78345c8a865cf58ce44

Signed-off-by: hw_whx <wanghexiang7@huawei.com>
Co-authored-by: hw_whx <wanghexiang7@huawei.com>
---
 tests/e2e/singlecard/test_vlm.py    | 32 +++++++++++++++++++++++++++++
 vllm_ascend/core/schedule_config.py |  8 ++++----
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/singlecard/test_vlm.py b/tests/e2e/singlecard/test_vlm.py
index 654078e7..8808d1e0 100644
--- a/tests/e2e/singlecard/test_vlm.py
+++ b/tests/e2e/singlecard/test_vlm.py
@@ -55,6 +55,38 @@ def test_multimodal_vl(prompt_template):
             assert output_str, "Generated output should not be empty."
 
 
+def test_multimodal_ascend_scheduler(prompt_template):
+    image = ImageAsset("cherry_blossom") \
+        .pil_image.convert("RGB")
+    img_questions = [
+        "What is the content of this image?",
+        "Describe the content of this image in detail.",
+        "What's in the image?",
+        "Where is this image taken?",
+    ]
+    images = [image] * len(img_questions)
+    prompts = prompt_template(img_questions)
+    with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
+                    max_model_len=4096,
+                    additional_config={
+                        'ascend_scheduler_config': {
+                            'enabled': True,
+                        },
+                    },
+                    mm_processor_kwargs={
+                        "min_pixels": 28 * 28,
+                        "max_pixels": 1280 * 28 * 28,
+                        "fps": 1,
+                    },
+                    enforce_eager=True) as vllm_model:
+        outputs = vllm_model.generate_greedy(prompts=prompts,
+                                             images=images,
+                                             max_tokens=64)
+        assert len(outputs) == len(prompts)
+        for _, output_str in outputs:
+            assert output_str, "Generated output should not be empty."
+
+
 def test_multimodal_audio():
     audio_prompt = "".join([
         f"Audio {idx+1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n"
diff --git a/vllm_ascend/core/schedule_config.py b/vllm_ascend/core/schedule_config.py
index 83e3eed4..a20c97c5 100644
--- a/vllm_ascend/core/schedule_config.py
+++ b/vllm_ascend/core/schedule_config.py
@@ -26,7 +26,7 @@ MAX_INT = 2147483647
 @dataclass
 class AscendSchedulerConfig(SchedulerConfig):
     enable_chunked_prefill: bool = False
-    max_long_partial_prefills: int = MAX_INT
+    max_long_partial_prefills: int = 1
     long_prefill_token_threshold: int = MAX_INT
     policy: str = "fcfs"
     scheduler_cls: Union[str, Type[object]] = (
@@ -73,9 +73,9 @@ class AscendSchedulerConfig(SchedulerConfig):
                 "max_num_batched_tokens and makes vLLM reject longer "
                 "sequences. Please increase max_num_batched_tokens or "
                 "decrease max_model_len.")
-        # concurrent partial prefills. Default is inf
+        # concurrent partial prefills. Default is 1 meaning not enabled.
         if self.max_long_partial_prefills is None:
-            self.max_long_partial_prefills = MAX_INT
+            self.max_long_partial_prefills = 1
             self.long_prefill_token_threshold = MAX_INT
 
         if self.long_prefill_token_threshold is None or \
@@ -105,4 +105,4 @@ class AscendSchedulerConfig(SchedulerConfig):
         if getattr(self, "scheduler_delay_factor", 0) > 0:
             raise NotImplementedError(
                 "currently AscendScheduler doesn't support scheduler_delay_factor."
-            )
\ No newline at end of file
+            )