Fix the device error when using ray as vllm-acend backend (#884)

1. Remove RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES 2. Add lazy init for vllm_ascend_C Signed-off-by: zhuo97 <1103045176@qq.com>
2025-06-16 21:03:16 +08:00
parent 69b817ed65
commit f5404dc650
7 changed files with 40 additions and 20 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -54,6 +54,8 @@ MAX_CAPTURE_SIZE = 1920

 ASCEND_QUATIZATION_METHOD = "ascend"

+CUSTOM_OP_ENABLED = None
+

 def try_register_lib(lib_name: str, lib_info: str = ""):
    import importlib
@@ -68,6 +70,31 @@ def try_register_lib(lib_name: str, lib_info: str = ""):
        pass


+def enable_custom_op():
+    """
+    Enable lazy init for vllm_ascend_C to avoid early initialization of CANN's RTS component. 
+    Ensure that ASCEND_RT_VISIBLE_DEVICES can be dynamically modified before torch.npu.set_device().
+    """
+    global CUSTOM_OP_ENABLED
+
+    if CUSTOM_OP_ENABLED is not None:
+        return CUSTOM_OP_ENABLED
+
+    else:
+        try:
+            # register custom ops into torch_library here
+            import vllm_ascend.vllm_ascend_C  # type: ignore  # noqa: F401
+            CUSTOM_OP_ENABLED = True
+
+        except ImportError:
+            CUSTOM_OP_ENABLED = False
+            logger.warning(
+                "Warning: Failed to register custom ops, all custom ops will be disabled"
+            )
+
+        return CUSTOM_OP_ENABLED
+
+
 def find_hccl_library() -> str:
    """
    We either use the library file specified by the `HCCL_SO_PATH`