Fix the device error when using ray as vllm-acend backend (#884)

1. Remove RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES 2. Add lazy init for vllm_ascend_C Signed-off-by: zhuo97 <1103045176@qq.com>
2025-06-16 21:03:16 +08:00
parent 69b817ed65
commit f5404dc650
7 changed files with 40 additions and 20 deletions
--- a/vllm_ascend/attention/attention.py
+++ b/vllm_ascend/attention/attention.py
@@ -36,7 +36,7 @@ from vllm.utils import async_tensor_h2d, make_tensor_with_pad

 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.ops.cache import concat_and_cache_mla
-from vllm_ascend.platform import CUSTOM_OP_ENABLED
+from vllm_ascend.utils import enable_custom_op
 from vllm_ascend.worker.model_runner import (
    ModelInputForNPUBuilder, ModelInputForNPUWithSamplingMetadata)

@@ -462,7 +462,7 @@ class AscendMetadata(AttentionMetadata):
        for i in range(num_queries):
            self.seq_lens[i] += 1
        self.max_decode_seq_len = max(self.seq_lens)
-        if CUSTOM_OP_ENABLED:
+        if enable_custom_op():
            #advance a step on NPU for existing inputs for a multi-step runner if custom ops is enabled
            torch.ops._C.advance_step_flashattn_ascendc(
                num_seqs=num_seqs,