diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 080f887..9007a85 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -177,8 +177,7 @@ jobs: run: | pytest -sv tests/e2e/multicard/test_data_parallel.py pytest -sv tests/e2e/multicard/test_expert_parallel.py - # FixMe - #pytest -sv tests/e2e/multicard/test_external_launcher.py + pytest -sv tests/e2e/multicard/test_external_launcher.py pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py diff --git a/tests/e2e/multicard/test_external_launcher.py b/tests/e2e/multicard/test_external_launcher.py index 9bf855e..d544169 100644 --- a/tests/e2e/multicard/test_external_launcher.py +++ b/tests/e2e/multicard/test_external_launcher.py @@ -108,6 +108,7 @@ def test_moe_external_launcher(model): assert proc.returncode == 0 +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_external_launcher_and_sleepmode(): script = Path( __file__ @@ -154,6 +155,7 @@ def test_external_launcher_and_sleepmode(): assert proc.returncode == 0 +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_external_launcher_and_sleepmode_level2(): script = Path( __file__ diff --git a/tests/e2e/singlecard/test_camem.py b/tests/e2e/singlecard/test_camem.py index 3f1f92b..1cb3fc1 100644 --- a/tests/e2e/singlecard/test_camem.py +++ b/tests/e2e/singlecard/test_camem.py @@ -18,6 +18,8 @@ # import gc +import os +from unittest.mock import patch import torch from vllm import SamplingParams @@ -66,6 +68,7 @@ def test_basic_camem(): @fork_new_process_for_each_test +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_end_to_end(): free, total = torch.npu.mem_get_info() used_bytes_baseline = total - free # in case other process is running diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py index 2313e71..2f9f166 100644 --- a/tests/ut/worker/test_worker_v1.py +++ b/tests/ut/worker/test_worker_v1.py @@ -1,3 +1,4 @@ +import os import unittest from unittest.mock import MagicMock, patch @@ -246,6 +247,7 @@ class TestNPUWorker(TestBase): @patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled") @patch("vllm_ascend.worker.worker_v1.CaMemAllocator") + @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_wake_up_mode_enabled(self, mock_allocator_class, mock_sleep_mode_enabled): """Test wake_up method when sleep mode is enabled""" @@ -268,6 +270,7 @@ class TestNPUWorker(TestBase): mock_allocator.wake_up.assert_called_once_with(tags=["test_tag"]) @patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled") + @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"}) def test_wake_up_mode_disabled_raises_error(self, mock_sleep_mode_enabled): """Test wake_up method raises exception when sleep mode is disabled""" from vllm_ascend.worker.worker_v1 import NPUWorker diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index f14823f..3f6db84 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -48,7 +48,7 @@ from vllm_ascend.cpu_binding import bind_cpus from vllm_ascend.device_allocator.camem import CaMemAllocator from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel from vllm_ascend.platform import NPUPlatform -from vllm_ascend.utils import (init_ascend_soc_version, +from vllm_ascend.utils import (init_ascend_soc_version, is_enable_nz, register_ascend_customop, sleep_mode_enabled, try_register_lib) from vllm_ascend.worker.model_runner_v1 import NPUModelRunner @@ -178,6 +178,11 @@ class NPUWorker(WorkerBase): raise ValueError( "Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1." ) + + if is_enable_nz(): + raise ValueError( + "FRACTAL_NZ mode is enabled. This may cause model parameter precision issues " + "in the RL scenarios. Please set VLLM_ASCEND_ENABLE_NZ=0.") allocator = CaMemAllocator.get_instance() allocator.wake_up(tags=tags)