diff --git a/tests/e2e/multicard/2-cards/test_external_launcher.py b/tests/e2e/multicard/2-cards/test_external_launcher.py
index dfc4ee75..f8b59e38 100644
--- a/tests/e2e/multicard/2-cards/test_external_launcher.py
+++ b/tests/e2e/multicard/2-cards/test_external_launcher.py
@@ -29,6 +29,7 @@ from unittest.mock import patch
 import pytest
 import torch_npu
 from modelscope import snapshot_download  # type: ignore
+from tests.e2e.conftest import wait_until_npu_memory_free
 
 MODELS = ["Qwen/Qwen3-0.6B"]
 MOE_MODELS = ["Qwen/Qwen3-30B-A3B"]
@@ -110,6 +111,7 @@ def test_qwen3_moe_external_launcher_ep_tp2(model):
 
 
 @patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_NZ": "0"})
+@wait_until_npu_memory_free()
 def test_qwen3_external_launcher_with_sleepmode():
     script = Path(
         __file__
diff --git a/vllm_ascend/attention/context_parallel/mla_cp.py b/vllm_ascend/attention/context_parallel/mla_cp.py
index a53dfb58..de1bc5f3 100644
--- a/vllm_ascend/attention/context_parallel/mla_cp.py
+++ b/vllm_ascend/attention/context_parallel/mla_cp.py
@@ -79,7 +79,7 @@ class AscendMlaCPMetadataBuilder(AscendMLAMetadataBuilder):
         fast_build: bool = False,
     ) -> AscendMLAMetadata:
         metadata_cls = super().build(common_prefix_len, common_attn_metadata)
-        if self.num_prefills == 0 and self.pcp_size > 1:
+        if self.pcp_size > 1:
             self.slot_mapping[: self.num_decode_tokens] = self.slot_mapping[
                 : self.num_decode_tokens * self.pcp_size : self.pcp_size
             ]