[CI] Optimize nightly CI (#3898)
### What this PR does / why we need it?
This patch mainly fix the the problem of not being able to determine the
exit status of the pod's entrypoint script and some other tiny
optimizations:
1. Shorten wait for server timeout
2. fix typo
3. fix the issue of ais_bench failing to correctly access the proxy URL
in a PD separation scenario.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -42,6 +42,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
single-node-tests:
|
||||
name: single-node
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -85,6 +86,7 @@ jobs:
|
||||
tests: ${{ matrix.test_config.tests }}
|
||||
|
||||
multi-node-tests:
|
||||
name: multi-node
|
||||
needs: single-node-tests
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
strategy:
|
||||
@@ -93,19 +95,19 @@ jobs:
|
||||
matrix:
|
||||
test_config:
|
||||
- name: multi-node-deepseek-pd
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
|
||||
config_file_path: DeepSeek-V3.yaml
|
||||
size: 2
|
||||
- name: multi-node-qwen3-dp
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
|
||||
config_file_path: Qwen3-235B-A3B.yaml
|
||||
size: 2
|
||||
- name: multi-node-dpsk-4node-pd
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
|
||||
config_file_path: DeepSeek-R1-W8A8.yaml
|
||||
size: 4
|
||||
- name: multi-node-qwenw8a8-2node
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml
|
||||
config_file_path: Qwen3-235B-W8A8.yaml
|
||||
size: 2
|
||||
- name: multi-node-glm-2node
|
||||
config_file_path: tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml
|
||||
config_file_path: GLM-4_5.yaml
|
||||
size: 2
|
||||
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
|
||||
with:
|
||||
@@ -117,12 +119,3 @@ jobs:
|
||||
config_file_path: ${{ matrix.test_config.config_file_path }}
|
||||
secrets:
|
||||
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
|
||||
|
||||
clear_resources:
|
||||
needs: multi-node-tests
|
||||
if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
|
||||
uses: ./.github/workflows/_kill_lws_resources.yaml
|
||||
with:
|
||||
runner: linux-aarch64-a3-0
|
||||
secrets:
|
||||
KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
|
||||
|
||||
Reference in New Issue
Block a user