diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
index 711fbbd1..3527b910 100644
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -491,6 +491,9 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
     # TODO: Find out whether we need to solve allreduce function
     MAX_CAPTURE_SIZE = 1800
 
+    # enable pcp or dcp will add new communication and consume additional approximately less than 100 streams
+    CP_ADDITIONAL_STREAM_NUM = 100
+
     # Store original configuration and temporarily clear it
     compilation_config = vllm_config.compilation_config
     original_sizes, compilation_config.cudagraph_capture_sizes = \
@@ -547,6 +550,12 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
             "Calculated maximum supported batch sizes for ACL graph: %s",
             max_num_batch_sizes)
     else:
+        # enable pcp or dcp will add new communication and consume additional approximately less than 100 streams
+        if parallel_config.prefill_context_parallel_size > 1:
+            MAX_CAPTURE_SIZE = MAX_CAPTURE_SIZE - CP_ADDITIONAL_STREAM_NUM
+        if parallel_config.decode_context_parallel_size > 1:
+            MAX_CAPTURE_SIZE = MAX_CAPTURE_SIZE - CP_ADDITIONAL_STREAM_NUM
+
         # The above describes an empirical formula applicable to the A2 hardware.
         # Under this configuration, HCCL employs the FFTS+ method for execution unfolding,
         # which adds only 1 concurrent stream without consuming collective communication execution unfolding streams.