[CI] Enable linux-aarch64-a2 (64GB) and tp2 * 2 max-parallel to speed up CI (#2065)
### What this PR does / why we need it?
Currently our workflow run time takes about 3 hours in total, which
seriously affects the developer experience, so it is urgent to have a
optimization, after this pr, It is expected that the running time of the
full CI can be shortened to 1h40min.
- Enable linux-aarch64-a2 (64GB) to replace linux-arm64-npu (32GB)
- Change TP4 ---> TP2 * 2 max-parallel
- Move DeepSeek-V2-Lite-W8A8 to single card test
### Does this PR introduce _any_ user-facing change?
No
- vLLM version: v0.10.0
- vLLM main:
a2480251ec
---------
Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
4
.github/workflows/accuracy_test.yaml
vendored
4
.github/workflows/accuracy_test.yaml
vendored
@@ -85,8 +85,8 @@ jobs:
|
||||
}}
|
||||
runs-on: >-
|
||||
${{
|
||||
(matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
|
||||
'linux-arm64-npu-2'
|
||||
(matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-aarch64-a2-2') ||
|
||||
'linux-aarch64-a2-1'
|
||||
}}
|
||||
strategy:
|
||||
matrix:
|
||||
|
||||
2
.github/workflows/vllm_ascend_doctest.yaml
vendored
2
.github/workflows/vllm_ascend_doctest.yaml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
matrix:
|
||||
vllm_verison: [v0.9.1-dev, v0.9.1-dev-openeuler, main, main-openeuler]
|
||||
name: vLLM Ascend test
|
||||
runs-on: linux-arm64-npu-1
|
||||
runs-on: linux-aarch64-a2-1
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:${{ matrix.vllm_verison }}
|
||||
steps:
|
||||
|
||||
7
.github/workflows/vllm_ascend_test.yaml
vendored
7
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -136,7 +136,7 @@ jobs:
|
||||
strategy:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-arm64-npu-1]
|
||||
os: [linux-aarch64-a2-1]
|
||||
vllm_version: [main, v0.10.0]
|
||||
name: singlecard e2e test
|
||||
runs-on: ${{ matrix.os }}
|
||||
@@ -213,9 +213,9 @@ jobs:
|
||||
needs: [e2e]
|
||||
if: ${{ needs.e2e.result == 'success' }}
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-arm64-npu-4]
|
||||
os: [linux-aarch64-a2-2]
|
||||
vllm_version: [main, v0.10.0]
|
||||
name: multicard e2e test
|
||||
runs-on: ${{ matrix.os }}
|
||||
@@ -275,7 +275,6 @@ jobs:
|
||||
# To avoid oom, we need to run the test in a single process.
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_dbo
|
||||
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeekV3_dbo
|
||||
pytest -sv tests/e2e/multicard/test_data_parallel.py
|
||||
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
strategy:
|
||||
max-parallel: 2
|
||||
matrix:
|
||||
os: [linux-arm64-npu-1, linux-arm64-npu-4]
|
||||
os: [linux-aarch64-a2-1, linux-aarch64-a2-2]
|
||||
vllm_version: [main, v0.10.0]
|
||||
name: vLLM Ascend long term test
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
Reference in New Issue
Block a user