[CI] Add nightly CI test cases for the GLM-5 (#7429)

### What this PR does / why we need it? Add nightly CI test cases for the GLM-5 Add model download for the GLM-5 https://github.com/vllm-project/vllm-ascend/actions/runs/23286178651/job/67710409642#logs - vLLM version: v0.17.0 - vLLM main: b31e9326a7 --------- Signed-off-by: liuhaiyang27 <liuhaiyang27@huawei.com> Signed-off-by: liuhy1213-cell <liuhy1213@gmail.com> Co-authored-by: liuhaiyang27 <liuhaiyang27@huawei.com>
2026-03-23 19:14:19 +08:00
parent 41dadd4312
commit fb283b5820
5 changed files with 26 additions and 10 deletions
--- a/.github/workflows/misc/model_list.json
+++ b/.github/workflows/misc/model_list.json
@@ -241,6 +241,7 @@
      "wemaster/deepseek_mtp_main_random_bf16",
      "wemaster/deepseek_mtp_main_random_w8a8_part",
      "xlangai/OpenCUA-7B",
      "Eco-Tech/GLM-5-w4a8",
      "Eco-Tech/GLM-4.7-W8A8-floatmtp",
      "MiniMax/MiniMax-M2.5"
    ]
--- a/.github/workflows/schedule_nightly_test_a3.yaml
+++ b/.github/workflows/schedule_nightly_test_a3.yaml
@@ -258,6 +258,9 @@ jobs:
          - name: deepseek-v3-2-w8a8
            os: linux-aarch64-a3-16
            config_file_path: DeepSeek-V3.2-W8A8.yaml
          - name: glm-5-w4a8
            os: linux-aarch64-a3-16
            config_file_path: GLM-5.yaml
          - name: glm-4.7-w8a8
            os: linux-aarch64-a3-16
            config_file_path: GLM-4.7.yaml
--- a/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml
+++ b/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml
@@ -3,13 +3,12 @@
 # ==========================================
 _envs: &envs
-  HCCL_BUFFSIZE: "200"
+  HCCL_BUFFSIZE: "1024"
  SERVER_PORT: "DEFAULT_PORT"
  HCCL_OP_EXPANSION_MODE: "AIV"
  OMP_PROC_BIND: "false"
  OMP_NUM_THREADS: "1"
  PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
  VLLM_ASCEND_BALANCE_SCHEDULING: "1"
 _server_cmd: &server_cmd
  - "--enable-expert-parallel"
@@ -36,6 +35,9 @@ _server_cmd: &server_cmd
  - "--speculative-config"
  - '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'
 _special_dependencies: &special_dependencies
  transformers: "5.2.0"
 _benchmarks: &benchmarks
  acc:
    case_type: accuracy
@@ -65,19 +67,13 @@ _benchmarks: &benchmarks
 test_cases:
  - name: "GLM-5-TP16-DP1-decodegraph"
    model: "Eco-Tech/GLM-5-w4a8"
    special_dependencies: *special_dependencies
    envs:
      <<: *envs
    server_cmd: *server_cmd
    server_cmd_extra:
      - "--compilation-config"
-      - '{"cudagraph_capture": [4,8,12,16,20,24,28,32], "cudagraph_model":"FULL_DECODE_ONLY"}'
+      - '{"cudagraph_capture_sizes": [4,8,16,32,64,128,256,512], "cudagraph_mode": "FULL_DECODE_ONLY"}'
    benchmarks:
      <<: *benchmarks
  - name: "GLM-5-TP16-DP1-eager"
    model: "Eco-Tech/GLM-5-w4a8"
    envs:
      <<: *envs
    server_cmd: *server_cmd
    benchmarks:
      <<: *benchmarks
--- a/tests/e2e/nightly/single_node/models/scripts/single_node_config.py
+++ b/tests/e2e/nightly/single_node/models/scripts/single_node_config.py
@@ -26,6 +26,7 @@ class SingleNodeConfig:
    name: str
    model: str
    envs: dict[str, Any] = field(default_factory=dict)
    special_dependencies: dict[str, Any] = field(default_factory=dict)
    prompts: list[str] = field(default_factory=lambda: PROMPTS)
    api_keyword_args: dict[str, Any] = field(default_factory=lambda: API_KEYWORD_ARGS)
    benchmarks: dict[str, Any] = field(default_factory=dict)
@@ -48,6 +49,8 @@ class SingleNodeConfig:
            self.api_keyword_args = API_KEYWORD_ARGS
        if self.benchmarks is None:
            self.benchmarks = {}
        if self.special_dependencies is None:
            self.special_dependencies = {}
        if self.test_content is None:
            self.test_content = []
@@ -100,6 +103,7 @@ class SingleNodeConfigLoader:
        "name",
        "model",
        "envs",
        "special_dependencies",
        "prompts",
        "api_keyword_args",
        "benchmarks",
@@ -169,6 +173,7 @@ class SingleNodeConfigLoader:
                    name=case["name"],
                    model=case["model"],
                    envs=case.get("envs", {}),
                    special_dependencies=case.get("special_dependencies", {}),
                    server_cmd=full_cmd,
                    epd_server_cmds=case.get("epd_server_cmds", []),
                    epd_proxy_args=case.get("epd_proxy_args", []),
--- a/tests/e2e/nightly/single_node/models/scripts/test_single_node.py
+++ b/tests/e2e/nightly/single_node/models/scripts/test_single_node.py
@@ -3,6 +3,8 @@ from typing import Any
 import openai
 import pytest
 import subprocess
 import sys
 from tests.e2e.conftest import DisaggEpdProxy, RemoteEPDServer, RemoteOpenAIServer
 from tests.e2e.nightly.single_node.models.scripts.single_node_config import (
@@ -144,6 +146,15 @@ def _run_benchmarks(config: SingleNodeConfig, port: int) -> None:
@pytest.mark.asyncio
@pytest.mark.parametrize("config", configs, ids=[config.name for config in configs])
 async def test_single_node(config: SingleNodeConfig) -> None:
    # TODO: remove this part after the transformers version upgraded
    if config.special_dependencies:
        for k, v in config.special_dependencies.items():
            command = [
                sys.executable,
                "-m", "pip", "install",
                f"{k}=={v}",
            ]
            subprocess.call(command)
    if config.service_mode == "epd":
        with (
            RemoteEPDServer(vllm_serve_args=config.epd_server_cmds, env_dict=config.envs) as _,