[Core] Disable the chunked prefill feature in Non-MLA LLMs (#2894)

### What this PR does / why we need it?
This PR enforces the forcible disabling of the chunked prefill feature
in Non-MLA models, as the performance of operators supporting this
functionality is currently suboptimal. Unless the user has enabled
chunked prefill in the ascend_scheduler_config, we would allow this
feature.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
CI passed with new added/existing test.

Related: https://github.com/vllm-project/vllm-ascend/pull/2659

- vLLM version: main
- vLLM main:
d21a36f5f9

Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
rjg-lyh
2025-09-12 23:17:09 +08:00
committed by GitHub
parent 756b8a1946
commit 585a494baa
3 changed files with 29 additions and 23 deletions

View File

@@ -25,7 +25,6 @@ from vllm.config import SchedulerConfig
class AscendSchedulerConfig(SchedulerConfig):
enable_chunked_prefill: bool = False
policy: str = "fcfs"
num_scheduler_steps: int = 1
scheduler_cls: Union[str, Type[object]] = (
"vllm_ascend.core.scheduler.AscendScheduler")
enable_pd_transfer: bool = False
@@ -44,7 +43,6 @@ class AscendSchedulerConfig(SchedulerConfig):
# Override default values into original SchedulerConfig
scheduler_config["enable_chunked_prefill"] = False
scheduler_config["policy"] = "fcfs"
scheduler_config["num_scheduler_steps"] = 1
scheduler_config["scheduler_cls"] = (
"vllm_ascend.core.scheduler.AscendScheduler")
scheduler_config["enable_pd_transfer"] = False
@@ -76,9 +74,6 @@ class AscendSchedulerConfig(SchedulerConfig):
if self.is_multimodal_model:
raise NotImplementedError(
"currently AscendScheduler only supports LLM models.")
if self.num_scheduler_steps > 1:
raise NotImplementedError(
"currently AscendScheduler doesn't support multi-step.")
if self.send_delta_data:
raise NotImplementedError(
"currently AscendScheduler doesn't support send_delta_data.")