[BugFix][CI]Fix DeepSeek-R1-W8A8-longseq nightly CI (#6297)

### What this PR does / why we need it? The precision issue arose because the kv cache of the p-node had not been fetched for an extended period(>6min) and was forcibly freed. To avoid this problem, the batch size was reduced and the timeout period has also been extended. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.14.1 - vLLM main: dc917cceb8 Signed-off-by: dsxsteven <dsxsteven@sina.com>
2026-01-28 16:36:24 +08:00
parent ac963f1519
commit 325cb16e3f
1 changed files with 5 additions and 4 deletions
--- a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml
+++ b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml
@@ -13,6 +13,7 @@ env_common:
  HCCL_DETERMINISTIC: True
  TASK_QUEUE_ENABLE: 1
  HCCL_OP_RETRY_ENABLE: "L0:0, L1:0"
+  VLLM_NIXL_ABORT_REQUEST_TIMEOUT: 300000

 disaggregated_prefill:
  enabled: true
@@ -34,7 +35,7 @@ deployment:
          --enable-expert-parallel
          --seed 1024
          --quantization ascend
-          --max-num-seqs 4
+          --max-num-seqs 3
          --max-model-len 32768
          --max-num-batched-tokens 16384
          --trust-remote-code
@@ -71,12 +72,12 @@ deployment:
        --enable-expert-parallel
        --seed 1024
        --quantization ascend
-        --max-num-seqs 4
+        --max-num-seqs 8
        --max-model-len 32768
        --max-num-batched-tokens 256
        --trust-remote-code
        --gpu-memory-utilization 0.85
-        --compilation_config '{"cudagraph_capture_sizes":[4,8,12,16],"cudagraph_mode": "FULL_DECODE_ONLY"}'
+        --compilation_config '{"cudagraph_capture_sizes":[4,8,16,32],"cudagraph_mode": "FULL_DECODE_ONLY"}'
        --enable-chunked-prefill
        --speculative-config '{"num_speculative_tokens": 3, "method":"mtp"}'
        --kv-transfer-config
@@ -103,6 +104,6 @@ benchmarks:
    request_conf: vllm_api_general_chat
    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
    max_out_len: 24576
-    batch_size: 512
+    batch_size: 16
    baseline: 95
    threshold: 5