diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 65db170..45647b0 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -103,9 +103,6 @@ class NPUPlatform(Platform): @classmethod def check_and_update_config(cls, vllm_config: VllmConfig) -> None: - # Register ops when setup. - from vllm_ascend import ops # noqa: F401 - parallel_config = vllm_config.parallel_config if parallel_config.worker_cls == "auto": parallel_config.worker_cls = "vllm_ascend.worker.NPUWorker" diff --git a/vllm_ascend/worker.py b/vllm_ascend/worker.py index c5884e3..27930d4 100644 --- a/vllm_ascend/worker.py +++ b/vllm_ascend/worker.py @@ -68,6 +68,8 @@ class NPUWorker(LocalOrDistributedWorkerBase): is_driver_worker: bool = False, model_runner_cls: Optional[Type[ModelRunnerBase]] = None, ) -> None: + # Register ops when worker init. + from vllm_ascend import ops # noqa: F401 WorkerBase.__init__(self, vllm_config=vllm_config) # Try to import mindie_turbo to accelerate vLLM inference.