From 3f4a358b140226e5c6d218742ff10a210cda5800 Mon Sep 17 00:00:00 2001
From: Pleaplusone <pleaplusone.gy@gmail.com>
Date: Tue, 19 Aug 2025 09:09:43 +0800
Subject: [PATCH] [Bugfix] Fix custom op register issue (#2409)

### What this PR does / why we need it?
Our current code register the custom ops inside the platform
intialization phase. however, when a new process started by creating a
worker, the former patch will lose it effect on the custom ops and lead
to fallback to the native pass wrote in vllm. This PR move the patch
code to the worker to make sure the custom op patch worker as our
expected.

### Does this PR introduce _any_ user-facing change?
No

- vLLM version: v0.10.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/8ea0c2753a273e24957ab4587c200a3254ebe970

Signed-off-by: ganyi <pleaplusone.gy@gmail.com>
---
 vllm_ascend/platform.py         | 5 +----
 vllm_ascend/worker/worker_v1.py | 5 +++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index 5be5a05..7f21f26 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -29,7 +29,7 @@ from vllm.platforms import Platform, PlatformEnum
 from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
                                        init_ascend_config)
 from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, is_310p,
-                               register_ascend_customop, update_aclgraph_sizes)
+                               update_aclgraph_sizes)
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig, VllmConfig
@@ -201,9 +201,6 @@ class NPUPlatform(Platform):
                     "For better performance in Qwen3 MoE, SP only works exclusively with MC2, AllToAll, and AllToAllV."
                 )
 
-        # register Ascend CustomOp
-        register_ascend_customop()
-
     @classmethod
     def get_attn_backend_cls(cls,
                              selected_backend,
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
index 19ef2ef..4e75a7d 100644
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -45,7 +45,8 @@ from vllm_ascend.ascend_config import init_ascend_config
 from vllm_ascend.device_allocator.camem import CaMemAllocator
 from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
 from vllm_ascend.platform import NPUPlatform
-from vllm_ascend.utils import (init_ascend_soc_version, sleep_mode_enabled,
+from vllm_ascend.utils import (init_ascend_soc_version,
+                               register_ascend_customop, sleep_mode_enabled,
                                try_register_lib)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
 
@@ -69,7 +70,7 @@ class NPUWorker(WorkerBase):
         from vllm_ascend import ops
         ops.register_dummy_fusion_op()
         _register_atb_extensions()
-
+        register_ascend_customop()
         # init ascend config and soc version
         init_ascend_config(vllm_config)
         init_ascend_soc_version()