diff --git a/tests/e2e/multicard/test_external_launcher.py b/tests/e2e/multicard/test_external_launcher.py
index d5441691..05851db1 100644
--- a/tests/e2e/multicard/test_external_launcher.py
+++ b/tests/e2e/multicard/test_external_launcher.py
@@ -148,8 +148,6 @@ def test_external_launcher_and_sleepmode():
 
     print(output)
 
-    assert "TP RANKS: [0]" in output
-    assert "TP RANKS: [1]" in output
     assert "Generated text:" in output
     assert "Sleep and wake up successfully!!" in output
     assert proc.returncode == 0
@@ -198,8 +196,6 @@ def test_external_launcher_and_sleepmode_level2():
 
     print(output)
 
-    assert "TP RANKS: [0]" in output
-    assert "TP RANKS: [1]" in output
     assert "Generated text:" in output
     assert "Sleep and wake up successfully!!" in output
     assert proc.returncode == 0
diff --git a/tests/e2e/singlecard/test_aclgraph.py b/tests/e2e/singlecard/test_aclgraph.py
index efa6cb39..86ce4795 100644
--- a/tests/e2e/singlecard/test_aclgraph.py
+++ b/tests/e2e/singlecard/test_aclgraph.py
@@ -100,8 +100,6 @@ def test_models_with_aclgraph(
     )
 
 
-@pytest.mark.skip("Skipping this test for now, "
-                  "it fails intermittently and needs investigation.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("max_tokens", [5])
 def test_models_with_aclgraph_full_decode_only(
@@ -172,7 +170,10 @@ def test_models_with_aclgraph_full_decode_only(
                 model,
                 max_model_len=1024,
                 enforce_eager=False,
-                compilation_config={"cudagraph_mode": "FULL_DECODE_ONLY"},
+                compilation_config={
+                    "cudagraph_capture_sizes": [4, 8, 32, 64],
+                    "cudagraph_mode": "FULL_DECODE_ONLY"
+                },
         ) as runner:
             vllm_aclgraph_outputs = runner.model.generate(
                 prompts, sampling_params)
@@ -180,7 +181,9 @@ def test_models_with_aclgraph_full_decode_only(
         with VllmRunner(
                 model,
                 max_model_len=1024,
-                enforce_eager=True,
+                compilation_config={
+                    "cudagraph_capture_sizes": [4, 8, 32, 64],
+                },
         ) as runner:
             vllm_eager_outputs = runner.model.generate(prompts,
                                                        sampling_params)
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 167345bf..7f8fe1e1 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -108,8 +108,7 @@ from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.ascend_forward_context import (MoECommType,
                                                 set_ascend_forward_context)
 from vllm_ascend.attention.attention_mask import AttentionMaskBuilder
-from vllm_ascend.attention.attention_v1 import (AscendAttentionMetadataBuilder,
-                                                AscendAttentionState)
+from vllm_ascend.attention.attention_v1 import AscendAttentionState
 from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,
                                          AscendPrefillContextParallelMetadata)
 # yapf: disable
@@ -2887,12 +2886,12 @@ class NPUModelRunner(LoRAModelRunnerMixin):
 
                 for attn_group in self.attn_groups[kv_cache_group_id]:
                     builder = attn_group.get_metadata_builder()
-                    if isinstance(builder, AscendAttentionMetadataBuilder):
-                        attn_metadata_full_attention = builder.build_for_graph_capture(
-                            common_attn_metadata, attn_state, self.get_model())
-                    elif isinstance(builder, GDNAttentionMetadataBuilder):
+                    if isinstance(builder, GDNAttentionMetadataBuilder):
                         attn_metadata_gdn_attention = builder.build_for_cudagraph_capture(
                             common_metadata)
+                    else:
+                        attn_metadata_full_attention = builder.build_for_graph_capture(
+                            common_attn_metadata, attn_state, self.get_model())
                     for layer_name in kv_cache_group_spec.layer_names:
                         if "linear_attn" in layer_name:
                             attn_metadata[