diff --git a/tests/ut/test_utils.py b/tests/ut/test_utils.py
index 99f821a..508bb2a 100644
--- a/tests/ut/test_utils.py
+++ b/tests/ut/test_utils.py
@@ -259,7 +259,7 @@ class TestUtils(TestBase):
         utils.update_aclgraph_sizes(test_vllm_config)
         del os.environ['HCCL_OP_EXPANSION_MODE']
         self.assertEqual(
-            147,
+            138,
             len(test_vllm_config.compilation_config.cudagraph_capture_sizes))
 
         test_vllm_config.speculative_config = mock.MagicMock()
@@ -272,7 +272,7 @@ class TestUtils(TestBase):
         utils.update_aclgraph_sizes(test_vllm_config)
         del os.environ['HCCL_OP_EXPANSION_MODE']
         self.assertEqual(
-            120,
+            112,
             len(test_vllm_config.compilation_config.cudagraph_capture_sizes))
 
         # max_num_batch_sizes >= len(original_sizes)
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
index a61a5af..33e1e01 100644
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -40,12 +40,13 @@ if TYPE_CHECKING:
 else:
     VllmConfig = None
 
-# NOTE: Currently, we can only capture 1920 graphs at most,
+# NOTE: Currently, we can only capture 1800 graphs at most,
 # due to the limitation of ACL graph. This number is bounded by
-# the number of streams, which is 2048, we save 128 streams
+# the number of streams, which is 2048, we save 248 streams
 # as a buffer.
 # Maximum number of graphs that can be captured by ACL Graph
-MAX_CAPTURE_SIZE = 1920
+# TODO: Find out whether we need to solve allreduce function
+MAX_CAPTURE_SIZE = 1800
 
 ASCEND_QUANTIZATION_METHOD = "ascend"
 SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"]
@@ -320,6 +321,8 @@ def update_aclgraph_sizes(vllm_config: VllmConfig) -> None:
         # TODO: Find out whether we need to take into account the pp_size
         parallel_factor = 1 + num_comm_groups + int(
             parallel_config.enable_expert_parallel)
+        if is_moe_model(vllm_config):
+            parallel_factor += (parallel_config.data_parallel_size > 1)
         # Calculate maximum supported batch sizes considering model architecture on the A2 Hardware Device
         # Assume the following case:
         # MAX_CAPTURE_SIZE = 1920, num_hidden_layers = 48, data_parallel_size is 1, tensor_parallel_size is 4,
@@ -583,3 +586,8 @@ def matmul_allreduce_enable() -> bool:
 
 def dense_optim_enable() -> bool:
     return envs_ascend.VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE
+
+
+def is_moe_model(vllm_config: VllmConfig):
+    config = vllm_config.model_config.hf_config
+    return any('experts' in key.lower() for key in config.to_dict())