From 1f71da80eba7cefe4b4a9f459d00e5409678bbb7 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Fri, 13 Mar 2026 08:52:56 +0800 Subject: [PATCH] [CI] Fix server start failure when long weight loading (#7098) ### What this PR does / why we need it? When loading large models (e.g., 163 shards), weight loading can exceed the default 600s timeout. Engine startup timeout with the error: ```shell TimeoutError: Timed out waiting for engines to send initial message on input socket. ``` We should increase the `VLLM_ENGINE_READY_TIMEOUT_S ` to avoid it ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.16.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d --------- Signed-off-by: wangli --- .github/workflows/_e2e_nightly_single_node.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/_e2e_nightly_single_node.yaml b/.github/workflows/_e2e_nightly_single_node.yaml index f6ba5cff..5ca64810 100644 --- a/.github/workflows/_e2e_nightly_single_node.yaml +++ b/.github/workflows/_e2e_nightly_single_node.yaml @@ -76,6 +76,7 @@ jobs: UV_INDEX_STRATEGY: unsafe-best-match UV_NO_CACHE: 1 UV_SYSTEM_PYTHON: 1 + VLLM_ENGINE_READY_TIMEOUT_S: 1800 steps: - name: Check npu and CANN info run: | @@ -204,6 +205,7 @@ jobs: VLLM_CI_RUNNER: ${{ inputs.runner }} working-directory: /vllm-workspace/vllm-ascend run: | + export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH echo "Running pytest with tests path: ${{ inputs.tests }}" pytest -sv "${{ inputs.tests }}" \ --ignore=tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py @@ -217,6 +219,7 @@ jobs: CONFIG_YAML_PATH: ${{ inputs.config_file_path }} working-directory: /vllm-workspace/vllm-ascend run: | + export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib" >> ~/.bashrc echo "Running YAML-driven test with config: ${{ inputs.config_file_path }}" pytest -sv tests/e2e/nightly/single_node/models/scripts/test_single_node.py