drop ascend scheduler (#4498)

Ascend scheduler was added for non chunk prefill case before, since that the npu ops didn't work well with chunked prefill. Now the ops with chunked prefill work better, it's time to remove the ascend scheduler to use vLLM default scheduler. - vLLM version: v0.11.2 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-11-29 16:18:34 +08:00
parent 53a52d6614
commit f10acddb78
52 changed files with 85 additions and 2948 deletions
--- a/tests/e2e/singlecard/test_vlm.py
+++ b/tests/e2e/singlecard/test_vlm.py
@@ -20,7 +20,6 @@

 Run `pytest tests/test_offline_inference.py`.
 """
-import pytest
 from vllm import SamplingParams
 from vllm.assets.audio import AudioAsset
 from vllm.assets.image import ImageAsset
@@ -55,40 +54,6 @@ def test_multimodal_vl(prompt_template):
            assert output_str, "Generated output should not be empty."


-@pytest.mark.skip(reason="This e2e test will stuck in multi-batch scenario. "
-                  "Add this back after fixing the issue.")
-def test_multimodal_ascend_scheduler(prompt_template):
-    image = ImageAsset("cherry_blossom") \
-        .pil_image.convert("RGB")
-    img_questions = [
-        "What is the content of this image?",
-        "Describe the content of this image in detail.",
-        "What's in the image?",
-        "Where is this image taken?",
-    ]
-    images = [image] * len(img_questions)
-    prompts = prompt_template(img_questions)
-    with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
-                    max_model_len=4096,
-                    additional_config={
-                        'ascend_scheduler_config': {
-                            'enabled': True,
-                        },
-                    },
-                    mm_processor_kwargs={
-                        "min_pixels": 28 * 28,
-                        "max_pixels": 1280 * 28 * 28,
-                        "fps": 1,
-                    },
-                    enforce_eager=True) as vllm_model:
-        outputs = vllm_model.generate_greedy(prompts=prompts,
-                                             images=images,
-                                             max_tokens=64)
-        assert len(outputs) == len(prompts)
-        for _, output_str in outputs:
-            assert output_str, "Generated output should not be empty."
-
-
 def test_multimodal_audio():
    audio_prompt = "".join([
        f"Audio {idx+1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n"