From fb283b5820effe930d7f60952aca48177d710e94 Mon Sep 17 00:00:00 2001
From: liuhy1213-cell <liuhy1213@gmail.com>
Date: Mon, 23 Mar 2026 19:14:19 +0800
Subject: [PATCH] [CI] Add nightly CI test cases for the GLM-5 (#7429)

### What this PR does / why we need it?
Add nightly CI test cases for the GLM-5
Add model download for the GLM-5

https://github.com/vllm-project/vllm-ascend/actions/runs/23286178651/job/67710409642#logs
- vLLM version: v0.17.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/b31e9326a7d9394aab8c767f8ebe225c65594b60
---------
Signed-off-by: liuhaiyang27 <liuhaiyang27@huawei.com>
Signed-off-by: liuhy1213-cell <liuhy1213@gmail.com>
Co-authored-by: liuhaiyang27 <liuhaiyang27@huawei.com>
---
 .github/workflows/misc/model_list.json           |  1 +
 .github/workflows/schedule_nightly_test_a3.yaml  |  3 +++
 .../single_node/models/configs/GLM-5.yaml        | 16 ++++++----------
 .../models/scripts/single_node_config.py         |  5 +++++
 .../models/scripts/test_single_node.py           | 11 +++++++++++
 5 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/misc/model_list.json b/.github/workflows/misc/model_list.json
index 3f15bf2b..b02fec2f 100644
--- a/.github/workflows/misc/model_list.json
+++ b/.github/workflows/misc/model_list.json
@@ -241,6 +241,7 @@
       "wemaster/deepseek_mtp_main_random_bf16",
       "wemaster/deepseek_mtp_main_random_w8a8_part",
       "xlangai/OpenCUA-7B",
+      "Eco-Tech/GLM-5-w4a8",
       "Eco-Tech/GLM-4.7-W8A8-floatmtp",
       "MiniMax/MiniMax-M2.5"
     ]
diff --git a/.github/workflows/schedule_nightly_test_a3.yaml b/.github/workflows/schedule_nightly_test_a3.yaml
index 445bacd4..885c0854 100644
--- a/.github/workflows/schedule_nightly_test_a3.yaml
+++ b/.github/workflows/schedule_nightly_test_a3.yaml
@@ -258,6 +258,9 @@ jobs:
           - name: deepseek-v3-2-w8a8
             os: linux-aarch64-a3-16
             config_file_path: DeepSeek-V3.2-W8A8.yaml
+          - name: glm-5-w4a8
+            os: linux-aarch64-a3-16
+            config_file_path: GLM-5.yaml
           - name: glm-4.7-w8a8
             os: linux-aarch64-a3-16
             config_file_path: GLM-4.7.yaml
diff --git a/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml b/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml
index 8be988cb..7dfa9f26 100644
--- a/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml
+++ b/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml
@@ -3,13 +3,12 @@
 # ==========================================
 
 _envs: &envs
-  HCCL_BUFFSIZE: "200"
+  HCCL_BUFFSIZE: "1024"
   SERVER_PORT: "DEFAULT_PORT"
   HCCL_OP_EXPANSION_MODE: "AIV"
   OMP_PROC_BIND: "false"
   OMP_NUM_THREADS: "1"
   PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
-  VLLM_ASCEND_BALANCE_SCHEDULING: "1"
 
 _server_cmd: &server_cmd
   - "--enable-expert-parallel"
@@ -36,6 +35,9 @@ _server_cmd: &server_cmd
   - "--speculative-config"
   - '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'
 
+_special_dependencies: &special_dependencies
+  transformers: "5.2.0"
+
 _benchmarks: &benchmarks
   acc:
     case_type: accuracy
@@ -65,19 +67,13 @@ _benchmarks: &benchmarks
 test_cases:
   - name: "GLM-5-TP16-DP1-decodegraph"
     model: "Eco-Tech/GLM-5-w4a8"
+    special_dependencies: *special_dependencies
     envs:
       <<: *envs
     server_cmd: *server_cmd
     server_cmd_extra:
       - "--compilation-config"
-      - '{"cudagraph_capture": [4,8,12,16,20,24,28,32], "cudagraph_model":"FULL_DECODE_ONLY"}'
+      - '{"cudagraph_capture_sizes": [4,8,16,32,64,128,256,512], "cudagraph_mode": "FULL_DECODE_ONLY"}'
     benchmarks:
       <<: *benchmarks
 
-  - name: "GLM-5-TP16-DP1-eager"
-    model: "Eco-Tech/GLM-5-w4a8"
-    envs:
-      <<: *envs
-    server_cmd: *server_cmd
-    benchmarks:
-      <<: *benchmarks
diff --git a/tests/e2e/nightly/single_node/models/scripts/single_node_config.py b/tests/e2e/nightly/single_node/models/scripts/single_node_config.py
index 86c317c5..599987eb 100644
--- a/tests/e2e/nightly/single_node/models/scripts/single_node_config.py
+++ b/tests/e2e/nightly/single_node/models/scripts/single_node_config.py
@@ -26,6 +26,7 @@ class SingleNodeConfig:
     name: str
     model: str
     envs: dict[str, Any] = field(default_factory=dict)
+    special_dependencies: dict[str, Any] = field(default_factory=dict)
     prompts: list[str] = field(default_factory=lambda: PROMPTS)
     api_keyword_args: dict[str, Any] = field(default_factory=lambda: API_KEYWORD_ARGS)
     benchmarks: dict[str, Any] = field(default_factory=dict)
@@ -48,6 +49,8 @@ class SingleNodeConfig:
             self.api_keyword_args = API_KEYWORD_ARGS
         if self.benchmarks is None:
             self.benchmarks = {}
+        if self.special_dependencies is None:
+            self.special_dependencies = {}
         if self.test_content is None:
             self.test_content = []
 
@@ -100,6 +103,7 @@ class SingleNodeConfigLoader:
         "name",
         "model",
         "envs",
+        "special_dependencies",
         "prompts",
         "api_keyword_args",
         "benchmarks",
@@ -169,6 +173,7 @@ class SingleNodeConfigLoader:
                     name=case["name"],
                     model=case["model"],
                     envs=case.get("envs", {}),
+                    special_dependencies=case.get("special_dependencies", {}),
                     server_cmd=full_cmd,
                     epd_server_cmds=case.get("epd_server_cmds", []),
                     epd_proxy_args=case.get("epd_proxy_args", []),
diff --git a/tests/e2e/nightly/single_node/models/scripts/test_single_node.py b/tests/e2e/nightly/single_node/models/scripts/test_single_node.py
index a2f3822b..3b278e46 100644
--- a/tests/e2e/nightly/single_node/models/scripts/test_single_node.py
+++ b/tests/e2e/nightly/single_node/models/scripts/test_single_node.py
@@ -3,6 +3,8 @@ from typing import Any
 
 import openai
 import pytest
+import subprocess
+import sys
 
 from tests.e2e.conftest import DisaggEpdProxy, RemoteEPDServer, RemoteOpenAIServer
 from tests.e2e.nightly.single_node.models.scripts.single_node_config import (
@@ -144,6 +146,15 @@ def _run_benchmarks(config: SingleNodeConfig, port: int) -> None:
 @pytest.mark.asyncio
 @pytest.mark.parametrize("config", configs, ids=[config.name for config in configs])
 async def test_single_node(config: SingleNodeConfig) -> None:
+    # TODO: remove this part after the transformers version upgraded
+    if config.special_dependencies:
+        for k, v in config.special_dependencies.items():
+            command = [
+                sys.executable,
+                "-m", "pip", "install",
+                f"{k}=={v}",
+            ]
+            subprocess.call(command)
     if config.service_mode == "epd":
         with (
             RemoteEPDServer(vllm_serve_args=config.epd_server_cmds, env_dict=config.envs) as _,