[Bugfix] fix sleepmode level2 e2e test (#4019)
### What this PR does / why we need it?
enable sleepmode level2 e2e test and add the check logic to ensure the
nz is not enabled.
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
use e2e tests
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: wangx700 <wangxin700@huawei.com>
This commit is contained in:
2
.github/workflows/_e2e_test.yaml
vendored
2
.github/workflows/_e2e_test.yaml
vendored
@@ -182,7 +182,7 @@ jobs:
|
|||||||
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
|
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
|
||||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||||
pytest -sv tests/e2e/multicard/test_expert_parallel.py
|
pytest -sv tests/e2e/multicard/test_expert_parallel.py
|
||||||
# pytest -sv tests/e2e/multicard/test_external_launcher.py
|
pytest -sv tests/e2e/multicard/test_external_launcher.py
|
||||||
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
|
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
|
||||||
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
|
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
|
||||||
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
|
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
|
||||||
|
|||||||
@@ -108,6 +108,7 @@ def test_moe_external_launcher(model):
|
|||||||
assert proc.returncode == 0
|
assert proc.returncode == 0
|
||||||
|
|
||||||
|
|
||||||
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
|
||||||
def test_external_launcher_and_sleepmode():
|
def test_external_launcher_and_sleepmode():
|
||||||
script = Path(
|
script = Path(
|
||||||
__file__
|
__file__
|
||||||
@@ -154,6 +155,7 @@ def test_external_launcher_and_sleepmode():
|
|||||||
assert proc.returncode == 0
|
assert proc.returncode == 0
|
||||||
|
|
||||||
|
|
||||||
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
|
||||||
def test_external_launcher_and_sleepmode_level2():
|
def test_external_launcher_and_sleepmode_level2():
|
||||||
script = Path(
|
script = Path(
|
||||||
__file__
|
__file__
|
||||||
|
|||||||
@@ -18,6 +18,8 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
import gc
|
import gc
|
||||||
|
import os
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from vllm import SamplingParams
|
from vllm import SamplingParams
|
||||||
@@ -71,6 +73,7 @@ def test_basic_camem():
|
|||||||
|
|
||||||
|
|
||||||
@fork_new_process_for_each_test
|
@fork_new_process_for_each_test
|
||||||
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
|
||||||
def test_end_to_end():
|
def test_end_to_end():
|
||||||
free, total = torch.npu.mem_get_info()
|
free, total = torch.npu.mem_get_info()
|
||||||
used_bytes_baseline = total - free # in case other process is running
|
used_bytes_baseline = total - free # in case other process is running
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
@@ -273,6 +274,7 @@ class TestNPUWorker(TestBase):
|
|||||||
|
|
||||||
@patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled")
|
@patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled")
|
||||||
@patch("vllm_ascend.worker.worker_v1.CaMemAllocator")
|
@patch("vllm_ascend.worker.worker_v1.CaMemAllocator")
|
||||||
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
|
||||||
def test_wake_up_mode_enabled(self, mock_allocator_class,
|
def test_wake_up_mode_enabled(self, mock_allocator_class,
|
||||||
mock_sleep_mode_enabled):
|
mock_sleep_mode_enabled):
|
||||||
"""Test wake_up method when sleep mode is enabled"""
|
"""Test wake_up method when sleep mode is enabled"""
|
||||||
@@ -295,6 +297,7 @@ class TestNPUWorker(TestBase):
|
|||||||
mock_allocator.wake_up.assert_called_once_with(tags=["test_tag"])
|
mock_allocator.wake_up.assert_called_once_with(tags=["test_tag"])
|
||||||
|
|
||||||
@patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled")
|
@patch("vllm_ascend.worker.worker_v1.sleep_mode_enabled")
|
||||||
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
|
||||||
def test_wake_up_mode_disabled_raises_error(self, mock_sleep_mode_enabled):
|
def test_wake_up_mode_disabled_raises_error(self, mock_sleep_mode_enabled):
|
||||||
"""Test wake_up method raises exception when sleep mode is disabled"""
|
"""Test wake_up method raises exception when sleep mode is disabled"""
|
||||||
from vllm_ascend.worker.worker_v1 import NPUWorker
|
from vllm_ascend.worker.worker_v1 import NPUWorker
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ from vllm_ascend.cpu_binding import bind_cpus
|
|||||||
from vllm_ascend.device_allocator.camem import CaMemAllocator
|
from vllm_ascend.device_allocator.camem import CaMemAllocator
|
||||||
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
||||||
from vllm_ascend.platform import NPUPlatform
|
from vllm_ascend.platform import NPUPlatform
|
||||||
from vllm_ascend.utils import (init_ascend_soc_version,
|
from vllm_ascend.utils import (init_ascend_soc_version, is_enable_nz,
|
||||||
prefill_context_parallel_enable,
|
prefill_context_parallel_enable,
|
||||||
register_ascend_customop, sleep_mode_enabled,
|
register_ascend_customop, sleep_mode_enabled,
|
||||||
try_register_lib, vllm_version_is)
|
try_register_lib, vllm_version_is)
|
||||||
@@ -184,6 +184,11 @@ class NPUWorker(WorkerBase):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
|
"Sleep mode is not enabled. Please compile vllm-ascend with COMPILE_CUSTOM_KERNELS=1."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if is_enable_nz():
|
||||||
|
raise ValueError(
|
||||||
|
"FRACTAL_NZ mode is enabled. This may cause model parameter precision issues "
|
||||||
|
"in the RL scenarios. Please set VLLM_ASCEND_ENABLE_NZ=0.")
|
||||||
allocator = CaMemAllocator.get_instance()
|
allocator = CaMemAllocator.get_instance()
|
||||||
allocator.wake_up(tags=tags)
|
allocator.wake_up(tags=tags)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user