[Core] Disable the chunked prefill feature in Non-MLA LLMs (#2894)

### What this PR does / why we need it? This PR enforces the forcible disabling of the chunked prefill feature in Non-MLA models, as the performance of operators supporting this functionality is currently suboptimal. Unless the user has enabled chunked prefill in the ascend_scheduler_config, we would allow this feature. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed with new added/existing test. Related: https://github.com/vllm-project/vllm-ascend/pull/2659 - vLLM version: main - vLLM main: d21a36f5f9 Signed-off-by: rjg-lyh <1318825571@qq.com>
2025-09-12 23:17:09 +08:00
parent 756b8a1946
commit 585a494baa
3 changed files with 29 additions and 23 deletions
--- a/vllm_ascend/core/schedule_config.py
+++ b/vllm_ascend/core/schedule_config.py
@@ -25,7 +25,6 @@ from vllm.config import SchedulerConfig
 class AscendSchedulerConfig(SchedulerConfig):
    enable_chunked_prefill: bool = False
    policy: str = "fcfs"
-    num_scheduler_steps: int = 1
    scheduler_cls: Union[str, Type[object]] = (
        "vllm_ascend.core.scheduler.AscendScheduler")
    enable_pd_transfer: bool = False
@@ -44,7 +43,6 @@ class AscendSchedulerConfig(SchedulerConfig):
        # Override default values into original SchedulerConfig
        scheduler_config["enable_chunked_prefill"] = False
        scheduler_config["policy"] = "fcfs"
-        scheduler_config["num_scheduler_steps"] = 1
        scheduler_config["scheduler_cls"] = (
            "vllm_ascend.core.scheduler.AscendScheduler")
        scheduler_config["enable_pd_transfer"] = False
@@ -76,9 +74,6 @@ class AscendSchedulerConfig(SchedulerConfig):
        if self.is_multimodal_model:
            raise NotImplementedError(
                "currently AscendScheduler only supports LLM models.")
-        if self.num_scheduler_steps > 1:
-            raise NotImplementedError(
-                "currently AscendScheduler doesn't support multi-step.")
        if self.send_delta_data:
            raise NotImplementedError(
                "currently AscendScheduler doesn't support send_delta_data.")