[Core] Disable the chunked prefill feature in Non-MLA LLMs (#2894)
### What this PR does / why we need it?
This PR enforces the forcible disabling of the chunked prefill feature
in Non-MLA models, as the performance of operators supporting this
functionality is currently suboptimal. Unless the user has enabled
chunked prefill in the ascend_scheduler_config, we would allow this
feature.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI passed with new added/existing test.
Related: https://github.com/vllm-project/vllm-ascend/pull/2659
- vLLM version: main
- vLLM main:
d21a36f5f9
Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
@@ -25,7 +25,6 @@ from vllm.config import SchedulerConfig
|
||||
class AscendSchedulerConfig(SchedulerConfig):
|
||||
enable_chunked_prefill: bool = False
|
||||
policy: str = "fcfs"
|
||||
num_scheduler_steps: int = 1
|
||||
scheduler_cls: Union[str, Type[object]] = (
|
||||
"vllm_ascend.core.scheduler.AscendScheduler")
|
||||
enable_pd_transfer: bool = False
|
||||
@@ -44,7 +43,6 @@ class AscendSchedulerConfig(SchedulerConfig):
|
||||
# Override default values into original SchedulerConfig
|
||||
scheduler_config["enable_chunked_prefill"] = False
|
||||
scheduler_config["policy"] = "fcfs"
|
||||
scheduler_config["num_scheduler_steps"] = 1
|
||||
scheduler_config["scheduler_cls"] = (
|
||||
"vllm_ascend.core.scheduler.AscendScheduler")
|
||||
scheduler_config["enable_pd_transfer"] = False
|
||||
@@ -76,9 +74,6 @@ class AscendSchedulerConfig(SchedulerConfig):
|
||||
if self.is_multimodal_model:
|
||||
raise NotImplementedError(
|
||||
"currently AscendScheduler only supports LLM models.")
|
||||
if self.num_scheduler_steps > 1:
|
||||
raise NotImplementedError(
|
||||
"currently AscendScheduler doesn't support multi-step.")
|
||||
if self.send_delta_data:
|
||||
raise NotImplementedError(
|
||||
"currently AscendScheduler doesn't support send_delta_data.")
|
||||
|
||||
Reference in New Issue
Block a user