diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index f25f984..dbfe1dc 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -135,7 +135,8 @@ class NPUPlatform(Platform): else: structured_outputs_config = vllm_config.structured_outputs_config - if model_config is not None and not model_config.use_mla: + if (model_config is not None and not model_config.use_mla + and not scheduler_config.async_scheduling): logger.info( "Non-MLA LLMs forcibly disable the chunked prefill feature," "as the performance of operators supporting this feature "