[CI] Add wait logic for each individual case (#6036)

### What this PR does / why we need it? Wait until the NPU memory is clean ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: 2c24bc6996 --------- Signed-off-by: wangli <wangli858794774@gmail.com> Signed-off-by: leo-pony <nengjunma@outlook.com> Co-authored-by: leo-pony <nengjunma@outlook.com>
2026-01-20 21:05:44 +08:00
parent 750c06c78a
commit 8cf1e8d8a7
3 changed files with 84 additions and 3 deletions
--- a/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
+++ b/tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
@@ -26,6 +26,7 @@ import torch
 from vllm.utils.network_utils import get_open_port

 from vllm_ascend.utils import AscendDeviceType, get_ascend_device_type
+from tests.e2e.conftest import wait_until_npu_memory_free

 MODELS = [
    # Offline data parallel mode will be not supported/useful for dense models
@@ -137,6 +138,7 @@ def _run_worker_process(
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("max_tokens", [4, 36])
@patch.dict(os.environ, {"ASCEND_RT_VISIBLE_DEVICES": "0,1"})
+@wait_until_npu_memory_free(target_free_percentage=0.6)
 def test_models_aclgraph_capture_replay_metrics_dp2(
    model: str,
    max_tokens: int,