From 3f4a358b140226e5c6d218742ff10a210cda5800 Mon Sep 17 00:00:00 2001 From: Pleaplusone Date: Tue, 19 Aug 2025 09:09:43 +0800 Subject: [PATCH] [Bugfix] Fix custom op register issue (#2409) ### What this PR does / why we need it? Our current code register the custom ops inside the platform intialization phase. however, when a new process started by creating a worker, the former patch will lose it effect on the custom ops and lead to fallback to the native pass wrote in vllm. This PR move the patch code to the worker to make sure the custom op patch worker as our expected. ### Does this PR introduce _any_ user-facing change? No - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/8ea0c2753a273e24957ab4587c200a3254ebe970 Signed-off-by: ganyi --- vllm_ascend/platform.py | 5 +---- vllm_ascend/worker/worker_v1.py | 5 +++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 5be5a05..7f21f26 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -29,7 +29,7 @@ from vllm.platforms import Platform, PlatformEnum from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config, init_ascend_config) from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, is_310p, - register_ascend_customop, update_aclgraph_sizes) + update_aclgraph_sizes) if TYPE_CHECKING: from vllm.config import ModelConfig, VllmConfig @@ -201,9 +201,6 @@ class NPUPlatform(Platform): "For better performance in Qwen3 MoE, SP only works exclusively with MC2, AllToAll, and AllToAllV." ) - # register Ascend CustomOp - register_ascend_customop() - @classmethod def get_attn_backend_cls(cls, selected_backend, diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index 19ef2ef..4e75a7d 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -45,7 +45,8 @@ from vllm_ascend.ascend_config import init_ascend_config from vllm_ascend.device_allocator.camem import CaMemAllocator from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel from vllm_ascend.platform import NPUPlatform -from vllm_ascend.utils import (init_ascend_soc_version, sleep_mode_enabled, +from vllm_ascend.utils import (init_ascend_soc_version, + register_ascend_customop, sleep_mode_enabled, try_register_lib) from vllm_ascend.worker.model_runner_v1 import NPUModelRunner @@ -69,7 +70,7 @@ class NPUWorker(WorkerBase): from vllm_ascend import ops ops.register_dummy_fusion_op() _register_atb_extensions() - + register_ascend_customop() # init ascend config and soc version init_ascend_config(vllm_config) init_ascend_soc_version()