From e33751ef8bd42655ffa564332657cd1fa117c1da Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Sat, 25 Oct 2025 09:41:33 +0800 Subject: [PATCH] [BugFix][Core] Fix a bug running multi-modal with ascend_scheduler (#3675) This PR fix the bug related with running multi-modal models with AscendScheduler. This bug was introduced by PR #2372 by using the same parameter names as vLLM with different default values. Currently I fix this bug by changing the default values of these two parameters to align with vLLM. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/17c540a993af88204ad1b78345c8a865cf58ce44 Signed-off-by: hw_whx Co-authored-by: hw_whx --- tests/e2e/singlecard/test_vlm.py | 32 +++++++++++++++++++++++++++++ vllm_ascend/core/schedule_config.py | 8 ++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/tests/e2e/singlecard/test_vlm.py b/tests/e2e/singlecard/test_vlm.py index 654078e7..8808d1e0 100644 --- a/tests/e2e/singlecard/test_vlm.py +++ b/tests/e2e/singlecard/test_vlm.py @@ -55,6 +55,38 @@ def test_multimodal_vl(prompt_template): assert output_str, "Generated output should not be empty." +def test_multimodal_ascend_scheduler(prompt_template): + image = ImageAsset("cherry_blossom") \ + .pil_image.convert("RGB") + img_questions = [ + "What is the content of this image?", + "Describe the content of this image in detail.", + "What's in the image?", + "Where is this image taken?", + ] + images = [image] * len(img_questions) + prompts = prompt_template(img_questions) + with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct", + max_model_len=4096, + additional_config={ + 'ascend_scheduler_config': { + 'enabled': True, + }, + }, + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + "fps": 1, + }, + enforce_eager=True) as vllm_model: + outputs = vllm_model.generate_greedy(prompts=prompts, + images=images, + max_tokens=64) + assert len(outputs) == len(prompts) + for _, output_str in outputs: + assert output_str, "Generated output should not be empty." + + def test_multimodal_audio(): audio_prompt = "".join([ f"Audio {idx+1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n" diff --git a/vllm_ascend/core/schedule_config.py b/vllm_ascend/core/schedule_config.py index 83e3eed4..a20c97c5 100644 --- a/vllm_ascend/core/schedule_config.py +++ b/vllm_ascend/core/schedule_config.py @@ -26,7 +26,7 @@ MAX_INT = 2147483647 @dataclass class AscendSchedulerConfig(SchedulerConfig): enable_chunked_prefill: bool = False - max_long_partial_prefills: int = MAX_INT + max_long_partial_prefills: int = 1 long_prefill_token_threshold: int = MAX_INT policy: str = "fcfs" scheduler_cls: Union[str, Type[object]] = ( @@ -73,9 +73,9 @@ class AscendSchedulerConfig(SchedulerConfig): "max_num_batched_tokens and makes vLLM reject longer " "sequences. Please increase max_num_batched_tokens or " "decrease max_model_len.") - # concurrent partial prefills. Default is inf + # concurrent partial prefills. Default is 1 meaning not enabled. if self.max_long_partial_prefills is None: - self.max_long_partial_prefills = MAX_INT + self.max_long_partial_prefills = 1 self.long_prefill_token_threshold = MAX_INT if self.long_prefill_token_threshold is None or \ @@ -105,4 +105,4 @@ class AscendSchedulerConfig(SchedulerConfig): if getattr(self, "scheduler_delay_factor", 0) > 0: raise NotImplementedError( "currently AscendScheduler doesn't support scheduler_delay_factor." - ) \ No newline at end of file + )