From 55e37f50418f38861768556fdde9e86c2a22aef4 Mon Sep 17 00:00:00 2001 From: wangx700 Date: Sat, 8 Nov 2025 14:11:15 +0800 Subject: [PATCH] [v0.11.0][Bugfix] fix sleepmode level2 e2e test (#4023) ### What this PR does / why we need it? fix sleepmode level2 e2e test ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? use e2e tests Signed-off-by: wangx700 --- .github/workflows/_e2e_test.yaml | 3 +-- tests/e2e/multicard/test_external_launcher.py | 2 ++ tests/e2e/singlecard/test_camem.py | 3 +++ tests/ut/worker/test_worker_v1.py | 3 +++ vllm_ascend/worker/worker_v1.py | 7 ++++++- 5 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 080f887..9007a85 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -177,8 +177,7 @@ jobs: run: | pytest -sv tests/e2e/multicard/test_data_parallel.py pytest -sv tests/e2e/multicard/test_expert_parallel.py - # FixMe - #pytest -sv tests/e2e/multicard/test_external_launcher.py + pytest -sv tests/e2e/multicard/test_external_launcher.py pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py diff --git a/tests/e2e/multicard/test_external_launcher.py b/tests/e2e/multicard/test_external_launcher.py index 9bf855e..d544169 100644 --- a/tests/e2e/multicard/test_external_launcher.py +++ b/tests/e2e/multicard/test_external_launcher.py @@ -108,6 +108,7 @@ def test_moe_external_launcher(model): assert proc.returncode == 0 +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_external_launcher_and_sleepmode(): script = Path( __file__ @@ -154,6 +155,7 @@ def test_external_launcher_and_sleepmode(): assert proc.returncode == 0 +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_external_launcher_and_sleepmode_level2(): script = Path( __file__ diff --git a/tests/e2e/singlecard/test_camem.py b/tests/e2e/singlecard/test_camem.py index 3f1f92b..1cb3fc1 100644 --- a/tests/e2e/singlecard/test_camem.py +++ b/tests/e2e/singlecard/test_camem.py @@ -18,6 +18,8 @@ # import gc +import os +from unittest.mock import patch import torch from vllm import SamplingParams @@ -66,6 +68,7 @@ def test_basic_camem(): @fork_new_process_for_each_test +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_end_to_end(): free, total = torch.npu.mem_get_info() used_bytes_baseline = total - free # in case other process is running diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py index 2313e71..2f9f166 100644 --- a/tests/ut/worker/test_worker_v1.py +++ b/tests/ut/worker/test_worker_v1.py @@ -1,3 +1,4 @@ +import os import unittest from unittest.mock import MagicMock, patch @@ -246,6 +247,7 @@ class TestNPUWorker(TestBase): @patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled") @patch("vllm_ascend.worker.worker_v1.CaMemAllocator") + @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_wake_up_mode_enabled(self, mock_allocator_class, mock_sleep_mode_enabled): """Test wake_up method when sleep mode is enabled""" @@ -268,6 +270,7 @@ class TestNPUWorker(TestBase): mock_allocator.wake_up.assert_called_once_with(tags=["test_tag"]) @patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled") + @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_wake_up_mode_disabled_raises_error(self, mock_sleep_mode_enabled): """Test wake_up method raises exception when sleep mode is disabled""" from vllm_ascend.worker.worker_v1 import NPUWorker diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index f14823f..3f6db84 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -48,7 +48,7 @@ from vllm_ascend.cpu_binding import bind_cpus from vllm_ascend.device_allocator.camem import CaMemAllocator from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel from vllm_ascend.platform import NPUPlatform -from vllm_ascend.utils import (init_ascend_soc_version, +from vllm_ascend.utils import (init_ascend_soc_version, is_enable_nz, register_ascend_customop, sleep_mode_enabled, try_register_lib) from vllm_ascend.worker.model_runner_v1 import NPUModelRunner @@ -178,6 +178,11 @@ class NPUWorker(WorkerBase): raise ValueError( "Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1." ) + + if is_enable_nz(): + raise ValueError( + "FRACTAL_NZ mode is enabled. This may cause model parameter precision issues " + "in the RL scenarios. Please set VLLM_ASCEND_ENABLE_NZ=0.") allocator = CaMemAllocator.get_instance() allocator.wake_up(tags=tags)