From 621aa7d27096cb34ae7437914bddf74f4dc472de Mon Sep 17 00:00:00 2001 From: Ronald Date: Fri, 26 Sep 2025 08:51:54 +0800 Subject: [PATCH] fix error async_scheduler can't be enabled (#3127) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? PR #2894 make ascend_scheduler_config.enabled always be `True` for non-mla models,when `ascend_scheduler_config.enabled=True `, it will always initialize `AscendScheduler` which is a subclass of `Scheduler`, but when we enbale async_scheduling,we need to initialize `AsyncScheduler` in vllm, this will make async_scheduling can't be enabled. ### Does this PR introduce _any_ user-facing change? not-related ### How was this patch tested? when user set `async_scheduling`, it means user don't want to use `AscendScheduler`, so we shouldn't set `ascend_scheduler_config.enabled = True` - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/f225ea7dd98e9f29752e5c032cd4a8ee1d712f16 Signed-off-by: Ronald1995 --- vllm_ascend/platform.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index f25f984..dbfe1dc 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -135,7 +135,8 @@ class NPUPlatform(Platform): else: structured_outputs_config = vllm_config.structured_outputs_config - if model_config is not None and not model_config.use_mla: + if (model_config is not None and not model_config.use_mla + and not scheduler_config.async_scheduling): logger.info( "Non-MLA LLMs forcibly disable the chunked prefill feature," "as the performance of operators supporting this feature "