diff --git a/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml b/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
index b2374079..1ab0b3ea 100644
--- a/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
+++ b/tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
@@ -9,7 +9,7 @@ test_cases:
       HCCL_OP_EXPANSION_MODE: "AIV"
       OMP_PROC_BIND: "false"
       OMP_NUM_THREADS: "1"
-      HCCL_BUFFSIZE: "1024"
+      HCCL_BUFFSIZE: "256"
       VLLM_ASCEND_ENABLE_MLAPO: "1"
       PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
       VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
@@ -28,14 +28,14 @@ test_cases:
       - "--max-num-batched-tokens"
       - "8192"
       - "--max-num-seqs"
-      - "4"
+      - "8"
       - "--trust-remote-code"
       - "--quantization"
       - "ascend"
       - "--gpu-memory-utilization"
-      - "0.98"
+      - "0.93"
       - "--compilation-config"
-      - '{"cudagraph_capture_sizes":[8, 16, 24, 32, 40, 48], "cudagraph_mode":"FULL_DECODE_ONLY"}'
+      - '{"cudagraph_capture_sizes":[4, 8, 16, 20, 24, 28, 32], "cudagraph_mode":"FULL_DECODE_ONLY"}'
       - "--speculative-config"
       - '{"num_speculative_tokens": 3, "method":"deepseek_mtp"}'
       - "--additional-config"
@@ -63,16 +63,16 @@ test_cases:
         max_out_len: 1500
         batch_size: 1
         request_rate: 11.2
-        baseline: 134
+        baseline: 1
         threshold: 0.97
       perf_2:
         case_type: performance
         dataset_path: vllm-ascend/GSM8K-in3500-bs400
         request_conf: vllm_api_stream_chat
         dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_str_perf
-        num_prompts: 100
+        num_prompts: 128
         max_out_len: 1500
-        batch_size: 4
+        batch_size: 32
         request_rate: 11.2
-        baseline: 134
+        baseline: 210
         threshold: 0.97