diff --git a/tests/e2e/multicard/4-cards/long_sequence/test_basic.py b/tests/e2e/multicard/4-cards/long_sequence/test_basic.py
index 40f5f700..70317003 100644
--- a/tests/e2e/multicard/4-cards/long_sequence/test_basic.py
+++ b/tests/e2e/multicard/4-cards/long_sequence/test_basic.py
@@ -236,7 +236,8 @@ def test_dcp_basic():
                     decode_context_parallel_size=2,
                     max_num_batched_tokens=1024,
                     enable_expert_parallel=True,
-                    block_size=128) as runner:
+                    block_size=128,
+                    compilation_config={"pass_config": {"enable_sp": True}}) as runner:
         runner.model.generate(prompts, sampling_params)
 
 
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 36546b26..7d9710f1 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -1284,7 +1284,7 @@ class NPUModelRunner(GPUModelRunner):
                 if (
                     cudagraph_mode == CUDAGraphMode.FULL
                     or (enable_sp() and not self.model_config.use_mla)
-                    and self.pcp_size == 1  # TODO(lxs): fix this
+                    and self.pcp_size * self.dcp_size == 1
                 ):
                     # Currently, Graph Mode and SP will both pad num_tokens,
                     # Another possible condition is num_tokens_padded != num_tokens_unpadded