[CI] Add nightly CI test cases for the GLM-5 (#7429)
### What this PR does / why we need it?
Add nightly CI test cases for the GLM-5
Add model download for the GLM-5
https://github.com/vllm-project/vllm-ascend/actions/runs/23286178651/job/67710409642#logs
- vLLM version: v0.17.0
- vLLM main:
b31e9326a7
---------
Signed-off-by: liuhaiyang27 <liuhaiyang27@huawei.com>
Signed-off-by: liuhy1213-cell <liuhy1213@gmail.com>
Co-authored-by: liuhaiyang27 <liuhaiyang27@huawei.com>
This commit is contained in:
@@ -3,13 +3,12 @@
|
||||
# ==========================================
|
||||
|
||||
_envs: &envs
|
||||
HCCL_BUFFSIZE: "200"
|
||||
HCCL_BUFFSIZE: "1024"
|
||||
SERVER_PORT: "DEFAULT_PORT"
|
||||
HCCL_OP_EXPANSION_MODE: "AIV"
|
||||
OMP_PROC_BIND: "false"
|
||||
OMP_NUM_THREADS: "1"
|
||||
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
||||
VLLM_ASCEND_BALANCE_SCHEDULING: "1"
|
||||
|
||||
_server_cmd: &server_cmd
|
||||
- "--enable-expert-parallel"
|
||||
@@ -36,6 +35,9 @@ _server_cmd: &server_cmd
|
||||
- "--speculative-config"
|
||||
- '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'
|
||||
|
||||
_special_dependencies: &special_dependencies
|
||||
transformers: "5.2.0"
|
||||
|
||||
_benchmarks: &benchmarks
|
||||
acc:
|
||||
case_type: accuracy
|
||||
@@ -65,19 +67,13 @@ _benchmarks: &benchmarks
|
||||
test_cases:
|
||||
- name: "GLM-5-TP16-DP1-decodegraph"
|
||||
model: "Eco-Tech/GLM-5-w4a8"
|
||||
special_dependencies: *special_dependencies
|
||||
envs:
|
||||
<<: *envs
|
||||
server_cmd: *server_cmd
|
||||
server_cmd_extra:
|
||||
- "--compilation-config"
|
||||
- '{"cudagraph_capture": [4,8,12,16,20,24,28,32], "cudagraph_model":"FULL_DECODE_ONLY"}'
|
||||
- '{"cudagraph_capture_sizes": [4,8,16,32,64,128,256,512], "cudagraph_mode": "FULL_DECODE_ONLY"}'
|
||||
benchmarks:
|
||||
<<: *benchmarks
|
||||
|
||||
- name: "GLM-5-TP16-DP1-eager"
|
||||
model: "Eco-Tech/GLM-5-w4a8"
|
||||
envs:
|
||||
<<: *envs
|
||||
server_cmd: *server_cmd
|
||||
benchmarks:
|
||||
<<: *benchmarks
|
||||
|
||||
Reference in New Issue
Block a user