[MISC] Clean up torch_npu (#688)

torch_npu 2.5.1 support autoload now. This patch does: 1. remove useless torch_npu import 2. replace `torch_npu.npu` to `torch.npu`. Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-04-29 18:03:38 +08:00
parent 0329fad927
commit b917361ca5
15 changed files with 18 additions and 47 deletions
--- a/tests/multicard/test_pyhccl_distributed.py
+++ b/tests/multicard/test_pyhccl_distributed.py
@@ -20,7 +20,6 @@ import multiprocessing
 import os

 import torch
-import torch_npu  # noqa: F401
 from vllm.distributed.parallel_state import (get_world_group,
                                             init_distributed_environment)
 from vllm.utils import update_environment_variables
--- a/tests/ops/test_rotary_embedding.py
+++ b/tests/ops/test_rotary_embedding.py
@@ -9,7 +9,6 @@ from typing import Optional, Tuple, Union
 import pytest
 import torch
 import torch.nn as nn
-import torch_npu  # noqa: F401

 import vllm_ascend.platform  # noqa: F401

--- a/tests/singlecard/test_camem.py
+++ b/tests/singlecard/test_camem.py
@@ -25,11 +25,6 @@ from vllm.utils import GiB_bytes
 from tests.utils import fork_new_process_for_each_test
 from vllm_ascend.device_allocator.camem import CaMemAllocator

-try:
-    import torch_npu  # noqa: F401
-except ImportError:
-    print("Failed to import torch_npu.")
-

@fork_new_process_for_each_test
 def test_basic_camem():
@@ -53,9 +48,9 @@ def test_basic_camem():
    output = x + y + z
    assert torch.allclose(output, torch.ones_like(output) * 3)

-    free_bytes = torch_npu.npu.mem_get_info()[0]
+    free_bytes = torch.npu.mem_get_info()[0]
    allocator.sleep()
-    free_bytes_after_sleep = torch_npu.npu.mem_get_info()[0]
+    free_bytes_after_sleep = torch.npu.mem_get_info()[0]
    assert free_bytes_after_sleep > free_bytes
    allocator.wake_up()

@@ -67,7 +62,7 @@ def test_basic_camem():
@fork_new_process_for_each_test
 def test_end_to_end():
    os.environ["VLLM_USE_V1"] = "0"
-    free, total = torch_npu.npu.mem_get_info()
+    free, total = torch.npu.mem_get_info()
    used_bytes_baseline = total - free  # in case other process is running
    llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)
    prompt = "How are you?"
@@ -79,7 +74,7 @@ def test_end_to_end():
    # test sleep level 1 here.
    llm.sleep(level=1)

-    free_gpu_bytes_after_sleep, total = torch_npu.npu.mem_get_info()
+    free_gpu_bytes_after_sleep, total = torch.npu.mem_get_info()
    used_bytes = total - free_gpu_bytes_after_sleep - used_bytes_baseline
    # now the memory usage should be less than the model weights
    # (0.5B model, 1GiB weights)
--- a/tests/singlecard/test_pyhccl.py
+++ b/tests/singlecard/test_pyhccl.py
@@ -17,7 +17,6 @@
 # limitations under the License.
 #
 import torch
-import torch_npu  # noqa: F401

 from vllm_ascend.distributed.device_communicators.pyhccl_wrapper import \
    HCCLLibrary