From a6745b857714f74ecea940971e0a6f9f673967aa Mon Sep 17 00:00:00 2001
From: zhangxinyuehfad <59153331+zhangxinyuehfad@users.noreply.github.com>
Date: Thu, 5 Mar 2026 16:43:45 +0800
Subject: [PATCH] [CI] fix test_qwen3_moe_external_launcher_ep_tp2 (#6951)

### What this PR does / why we need it?
fix test_qwen3_moe_external_launcher_ep_tp2 by
wait_until_npu_memory_free

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.16.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/15d76f74e2fdb12a95ea00f0ca283acf6219a2b7

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
---
 tests/e2e/multicard/2-cards/test_external_launcher.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/e2e/multicard/2-cards/test_external_launcher.py b/tests/e2e/multicard/2-cards/test_external_launcher.py
index ef2da648..ff398232 100644
--- a/tests/e2e/multicard/2-cards/test_external_launcher.py
+++ b/tests/e2e/multicard/2-cards/test_external_launcher.py
@@ -79,6 +79,7 @@ def test_qwen3_external_launcher(model):
 
 
 @pytest.mark.parametrize("model", MOE_MODELS)
+@wait_until_npu_memory_free()
 def test_qwen3_moe_external_launcher_ep_tp2(model):
     script = Path(
         __file__
@@ -208,6 +209,7 @@ def test_qwen3_external_launcher_with_sleepmode_level2():
     reason="This test is only for Ascend910B devices.",
 )
 @pytest.mark.parametrize("model", MODELS)
+@wait_until_npu_memory_free()
 @patch.dict(os.environ, {
     "VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE": "1",
     "HCCL_BUFFSIZE": "500"