From bc5ca2c85674b90828c3c8d86f18f0b9c9cfeb4b Mon Sep 17 00:00:00 2001 From: zhangxinyuehfad <59153331+zhangxinyuehfad@users.noreply.github.com> Date: Wed, 29 Apr 2026 14:31:12 +0800 Subject: [PATCH] [0.18.0][Bugfix] Restore VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT to original value for nightly test (#8794) ### What this PR does / why we need it? PR #8618 renamed `VLLM_NIXL_ABORT_REQUEST_TIMEOUT` to `VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT` and simultaneously reduced the timeout value from 300000 to 480 seconds in the nightly test configs. The 480s value is far too short for heavy multi-node workloads (DeepSeek V3/R1 under W8A8 + EP), causing [spurious abort-request timeouts](https://github.com/vllm-project/vllm-ascend/actions/runs/25067539406/job/73441223206) in CI. This PR restores the timeout value to the original 300000 to fix the nightly test failures introduced by #8618. Signed-off-by: hfadzxy --- .../e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml | 2 +- .../multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml | 2 +- tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml index 0878245e..ca6681fa 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-R1-W8A8-longseq.yaml @@ -13,7 +13,7 @@ env_common: HCCL_DETERMINISTIC: True TASK_QUEUE_ENABLE: 1 HCCL_OP_RETRY_ENABLE: "L0:0, L1:0" - VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 300000 disaggregated_prefill: enabled: true diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml index b3d47ca0..147e87f0 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP-aime2025.yaml @@ -15,7 +15,7 @@ env_common: ASCEND_TRANSPORT_PRINT: 1 ACL_OP_INIT_MODE: 1 ASCEND_A3_ENABLE: 1 - VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 300000 VLLM_ENGINE_READY_TIMEOUT_S: 1800 HCCL_CONNECT_TIMEOUT: 1200 HCCL_INTRA_PCIE_ENABLE: 1 diff --git a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml index 9c46bb56..3d3eeb64 100644 --- a/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml +++ b/tests/e2e/nightly/multi_node/config/DeepSeek-V3_2-W8A8-EP.yaml @@ -15,7 +15,7 @@ env_common: ASCEND_TRANSPORT_PRINT: 1 ACL_OP_INIT_MODE: 1 ASCEND_A3_ENABLE: 1 - VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 480 + VLLM_MOONCAKE_ABORT_REQUEST_TIMEOUT: 300000 VLLM_ENGINE_READY_TIMEOUT_S: 1800 HCCL_CONNECT_TIMEOUT: 1200 HCCL_INTRA_PCIE_ENABLE: 1