From 243ab7d720cfb3bc25b1e71689b0834b8f5afd01 Mon Sep 17 00:00:00 2001
From: Li Wang <wangli858794774@gmail.com>
Date: Fri, 19 Dec 2025 21:21:42 +0800
Subject: [PATCH] [CI] Use offline mode for nightly test (#5187)

### What this PR does / why we need it?
For single node test, the lack of a retry mechanism for accessing
ModelScope resulted in an HTTP 400 error sometimes. I recommend using a
local offline cache instead.

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
---
 .github/workflows/_e2e_nightly_single_node.yaml    | 3 +++
 .github/workflows/pr_tag_image_build_and_push.yaml | 2 +-
 tests/e2e/conftest.py                              | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_e2e_nightly_single_node.yaml b/.github/workflows/_e2e_nightly_single_node.yaml
index df91260f..bff4f0e2 100644
--- a/.github/workflows/_e2e_nightly_single_node.yaml
+++ b/.github/workflows/_e2e_nightly_single_node.yaml
@@ -57,6 +57,9 @@ jobs:
     timeout-minutes: 600
     container:
       image: ${{ inputs.image }}
+    env:
+      TRANSFORMERS_OFFLINE: 1
+      VLLM_USE_MODELSCOPE: True
     steps:
       - name: Check npu and CANN info
         run: |
diff --git a/.github/workflows/pr_tag_image_build_and_push.yaml b/.github/workflows/pr_tag_image_build_and_push.yaml
index 4991ae04..a417fb2c 100644
--- a/.github/workflows/pr_tag_image_build_and_push.yaml
+++ b/.github/workflows/pr_tag_image_build_and_push.yaml
@@ -26,7 +26,7 @@ on:
       - 'cmake/**'
       - 'CMakeLists.txt'
       - 'csrc/**'
-    types: [ labeled, synchronize ]
+    types: [ labeled ]
   push:
     # Publish image when tagging, the Dockerfile in tag will be build as tag image
     branches:
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index a8207e3f..b59ab7ce 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -162,7 +162,7 @@ class RemoteOpenAIServer:
         self.proxy_port = proxy_port
 
         self._start_server(model, vllm_serve_args, env_dict)
-        max_wait_seconds = max_wait_seconds or 1800
+        max_wait_seconds = max_wait_seconds or 2800
         if self.disaggregated_prefill:
             assert proxy_port is not None, "for disaggregated_prefill, proxy port must be provided"
             self._wait_for_server_pd(timeout=max_wait_seconds)