From 87d6424b2ee301469381d99d41936a37bc0eec91 Mon Sep 17 00:00:00 2001 From: aipaes <82140963+aipaes@users.noreply.github.com> Date: Thu, 19 Mar 2026 16:43:29 +0800 Subject: [PATCH] [CI] Add nightly CI test cases for the GLM-4.7 model. (#7391) ### What this PR does / why we need it? Add acc nightly CI test cases for the GLM-4.7 model. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? through CI - vLLM version: v0.17.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d --------- Signed-off-by: zjks98 Co-authored-by: zjks98 --- .../workflows/schedule_nightly_test_a3.yaml | 3 + .../single_node/models/configs/GLM-4.7.yaml | 67 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 tests/e2e/nightly/single_node/models/configs/GLM-4.7.yaml diff --git a/.github/workflows/schedule_nightly_test_a3.yaml b/.github/workflows/schedule_nightly_test_a3.yaml index 5d9c2b7c..445bacd4 100644 --- a/.github/workflows/schedule_nightly_test_a3.yaml +++ b/.github/workflows/schedule_nightly_test_a3.yaml @@ -258,6 +258,9 @@ jobs: - name: deepseek-v3-2-w8a8 os: linux-aarch64-a3-16 config_file_path: DeepSeek-V3.2-W8A8.yaml + - name: glm-4.7-w8a8 + os: linux-aarch64-a3-16 + config_file_path: GLM-4.7.yaml - name: kimi-k2-thinking os: linux-aarch64-a3-16 config_file_path: Kimi-K2-Thinking.yaml diff --git a/tests/e2e/nightly/single_node/models/configs/GLM-4.7.yaml b/tests/e2e/nightly/single_node/models/configs/GLM-4.7.yaml new file mode 100644 index 00000000..a99c45ad --- /dev/null +++ b/tests/e2e/nightly/single_node/models/configs/GLM-4.7.yaml @@ -0,0 +1,67 @@ +# ========================================== +# Shared Configurations +# ========================================== + +_envs: &envs + HCCL_BUFFSIZE: "512" + SERVER_PORT: "DEFAULT_PORT" + OMP_PROC_BIND: "false" + OMP_NUM_THREADS: "1" + PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" + HCCL_OP_EXPANSION_MODE: "AIV" + VLLM_ASCEND_BALANCE_SCHEDULING: "1" + VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE: "1" + VLLM_ASCEND_ENABLE_FLASHCOMM1: "1" + VLLM_ASCEND_ENABLE_FUSED_MC2: "1" + +_server_cmd: &server_cmd + - "--enable-expert-parallel" + - "--tensor-parallel-size" + - "8" + - "--data-parallel-size" + - "2" + - "--port" + - "$SERVER_PORT" + - "--max-model-len" + - "8192" + - "--max-num-batched-tokens" + - "8192" + - "--max-num-seqs" + - "16" + - "--async-scheduling" + - "--quantization" + - "ascend" + - "--trust-remote-code" + - "--gpu-memory-utilization" + - "0.9" + - "--speculative-config" + - '{"num_speculative_tokens": 3, "method":"mtp"}' + - "--additional-config" + - '{"enable_shared_expert_dp": true, "ascend_fusion_config": {"fusion_ops_gmmswigluquant": false}}' + +_benchmarks: &benchmarks + acc: + case_type: accuracy + dataset_path: vllm-ascend/gsm8k-lite + request_conf: vllm_api_general_chat + dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt + max_out_len: 4096 + batch_size: 8 + baseline: 95 + threshold: 5 + +# ========================================== +# ACTUAL TEST CASES +# ========================================== + +test_cases: + - name: "GLM-4.7-TP8-DP2-decodegraph" + model: "Eco-Tech/GLM-4.7-W8A8-floatmtp" + envs: + <<: *envs + server_cmd: *server_cmd + server_cmd_extra: + - "--compilation-config" + - '{"cudagraph_capture_sizes": [1,2,4,8,16,32,64,128,256,512], "cudagraph_mode": "FULL_DECODE_ONLY"}' + benchmarks: + <<: *benchmarks