[Scheduler] Add AscendScheduler. (#543)

This PR adds AscendScheduler to vllm v1 engine.
This scheduler currently supports v0-style prefill-first scheduling
strategy.
In the future more schedule methods will be supported by this scheduler.

---------

Signed-off-by: hw_whx <wanghexiang7@huawei.com>
Co-authored-by: hw_whx <wanghexiang7@huawei.com>
This commit is contained in:
whx
2025-04-17 19:31:50 +08:00
committed by GitHub
parent 697908f5cd
commit 20dff4deff
9 changed files with 967 additions and 72 deletions

View File

@@ -40,6 +40,7 @@ from vllm.v1.utils import bind_kv_cache
from vllm.v1.worker.worker_base import WorkerBase
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.utils import try_register_lib
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
@@ -66,6 +67,11 @@ class NPUWorker(WorkerBase):
rank=rank,
distributed_init_method=distributed_init_method,
is_driver_worker=is_driver_worker)
# Try to import mindie_turbo to accelerate vLLM inference.
try_register_lib(
"mindie_turbo",
"MindIE Turbo is installed. vLLM inference will be accelerated with MindIE Turbo."
)
if self.cache_config.cache_dtype == "auto":
self.cache_dtype = self.model_config.dtype
else: