support multistep decode (#299)

Add multi step scheduler support for vllm-ascend

Signed-off-by: new-TonyWang <wangtonyyu222@gmail.com>
This commit is contained in:
Tony
2025-03-11 19:20:06 +08:00
committed by GitHub
parent feb6bdb12e
commit 4c9d78a035
5 changed files with 1067 additions and 10 deletions

View File

@@ -105,7 +105,11 @@ class NPUPlatform(Platform):
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
parallel_config = vllm_config.parallel_config
if parallel_config.worker_cls == "auto":
parallel_config.worker_cls = "vllm_ascend.worker.worker.NPUWorker"
if vllm_config.scheduler_config.is_multi_step:
parallel_config.worker_cls = "vllm_ascend.worker.multi_step_worker.MultiStepWorker"
else:
parallel_config.worker_cls = "vllm_ascend.worker.worker.NPUWorker"
cache_config = vllm_config.cache_config
if cache_config and cache_config.block_size is None:
cache_config.block_size = 128