From fb283b5820effe930d7f60952aca48177d710e94 Mon Sep 17 00:00:00 2001 From: liuhy1213-cell Date: Mon, 23 Mar 2026 19:14:19 +0800 Subject: [PATCH] [CI] Add nightly CI test cases for the GLM-5 (#7429) ### What this PR does / why we need it? Add nightly CI test cases for the GLM-5 Add model download for the GLM-5 https://github.com/vllm-project/vllm-ascend/actions/runs/23286178651/job/67710409642#logs - vLLM version: v0.17.0 - vLLM main: https://github.com/vllm-project/vllm/commit/b31e9326a7d9394aab8c767f8ebe225c65594b60 --------- Signed-off-by: liuhaiyang27 Signed-off-by: liuhy1213-cell Co-authored-by: liuhaiyang27 --- .github/workflows/misc/model_list.json | 1 + .github/workflows/schedule_nightly_test_a3.yaml | 3 +++ .../single_node/models/configs/GLM-5.yaml | 16 ++++++---------- .../models/scripts/single_node_config.py | 5 +++++ .../models/scripts/test_single_node.py | 11 +++++++++++ 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/.github/workflows/misc/model_list.json b/.github/workflows/misc/model_list.json index 3f15bf2b..b02fec2f 100644 --- a/.github/workflows/misc/model_list.json +++ b/.github/workflows/misc/model_list.json @@ -241,6 +241,7 @@ "wemaster/deepseek_mtp_main_random_bf16", "wemaster/deepseek_mtp_main_random_w8a8_part", "xlangai/OpenCUA-7B", + "Eco-Tech/GLM-5-w4a8", "Eco-Tech/GLM-4.7-W8A8-floatmtp", "MiniMax/MiniMax-M2.5" ] diff --git a/.github/workflows/schedule_nightly_test_a3.yaml b/.github/workflows/schedule_nightly_test_a3.yaml index 445bacd4..885c0854 100644 --- a/.github/workflows/schedule_nightly_test_a3.yaml +++ b/.github/workflows/schedule_nightly_test_a3.yaml @@ -258,6 +258,9 @@ jobs: - name: deepseek-v3-2-w8a8 os: linux-aarch64-a3-16 config_file_path: DeepSeek-V3.2-W8A8.yaml + - name: glm-5-w4a8 + os: linux-aarch64-a3-16 + config_file_path: GLM-5.yaml - name: glm-4.7-w8a8 os: linux-aarch64-a3-16 config_file_path: GLM-4.7.yaml diff --git a/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml b/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml index 8be988cb..7dfa9f26 100644 --- a/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml +++ b/tests/e2e/nightly/single_node/models/configs/GLM-5.yaml @@ -3,13 +3,12 @@ # ========================================== _envs: &envs - HCCL_BUFFSIZE: "200" + HCCL_BUFFSIZE: "1024" SERVER_PORT: "DEFAULT_PORT" HCCL_OP_EXPANSION_MODE: "AIV" OMP_PROC_BIND: "false" OMP_NUM_THREADS: "1" PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True" - VLLM_ASCEND_BALANCE_SCHEDULING: "1" _server_cmd: &server_cmd - "--enable-expert-parallel" @@ -36,6 +35,9 @@ _server_cmd: &server_cmd - "--speculative-config" - '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}' +_special_dependencies: &special_dependencies + transformers: "5.2.0" + _benchmarks: &benchmarks acc: case_type: accuracy @@ -65,19 +67,13 @@ _benchmarks: &benchmarks test_cases: - name: "GLM-5-TP16-DP1-decodegraph" model: "Eco-Tech/GLM-5-w4a8" + special_dependencies: *special_dependencies envs: <<: *envs server_cmd: *server_cmd server_cmd_extra: - "--compilation-config" - - '{"cudagraph_capture": [4,8,12,16,20,24,28,32], "cudagraph_model":"FULL_DECODE_ONLY"}' + - '{"cudagraph_capture_sizes": [4,8,16,32,64,128,256,512], "cudagraph_mode": "FULL_DECODE_ONLY"}' benchmarks: <<: *benchmarks - - name: "GLM-5-TP16-DP1-eager" - model: "Eco-Tech/GLM-5-w4a8" - envs: - <<: *envs - server_cmd: *server_cmd - benchmarks: - <<: *benchmarks diff --git a/tests/e2e/nightly/single_node/models/scripts/single_node_config.py b/tests/e2e/nightly/single_node/models/scripts/single_node_config.py index 86c317c5..599987eb 100644 --- a/tests/e2e/nightly/single_node/models/scripts/single_node_config.py +++ b/tests/e2e/nightly/single_node/models/scripts/single_node_config.py @@ -26,6 +26,7 @@ class SingleNodeConfig: name: str model: str envs: dict[str, Any] = field(default_factory=dict) + special_dependencies: dict[str, Any] = field(default_factory=dict) prompts: list[str] = field(default_factory=lambda: PROMPTS) api_keyword_args: dict[str, Any] = field(default_factory=lambda: API_KEYWORD_ARGS) benchmarks: dict[str, Any] = field(default_factory=dict) @@ -48,6 +49,8 @@ class SingleNodeConfig: self.api_keyword_args = API_KEYWORD_ARGS if self.benchmarks is None: self.benchmarks = {} + if self.special_dependencies is None: + self.special_dependencies = {} if self.test_content is None: self.test_content = [] @@ -100,6 +103,7 @@ class SingleNodeConfigLoader: "name", "model", "envs", + "special_dependencies", "prompts", "api_keyword_args", "benchmarks", @@ -169,6 +173,7 @@ class SingleNodeConfigLoader: name=case["name"], model=case["model"], envs=case.get("envs", {}), + special_dependencies=case.get("special_dependencies", {}), server_cmd=full_cmd, epd_server_cmds=case.get("epd_server_cmds", []), epd_proxy_args=case.get("epd_proxy_args", []), diff --git a/tests/e2e/nightly/single_node/models/scripts/test_single_node.py b/tests/e2e/nightly/single_node/models/scripts/test_single_node.py index a2f3822b..3b278e46 100644 --- a/tests/e2e/nightly/single_node/models/scripts/test_single_node.py +++ b/tests/e2e/nightly/single_node/models/scripts/test_single_node.py @@ -3,6 +3,8 @@ from typing import Any import openai import pytest +import subprocess +import sys from tests.e2e.conftest import DisaggEpdProxy, RemoteEPDServer, RemoteOpenAIServer from tests.e2e.nightly.single_node.models.scripts.single_node_config import ( @@ -144,6 +146,15 @@ def _run_benchmarks(config: SingleNodeConfig, port: int) -> None: @pytest.mark.asyncio @pytest.mark.parametrize("config", configs, ids=[config.name for config in configs]) async def test_single_node(config: SingleNodeConfig) -> None: + # TODO: remove this part after the transformers version upgraded + if config.special_dependencies: + for k, v in config.special_dependencies.items(): + command = [ + sys.executable, + "-m", "pip", "install", + f"{k}=={v}", + ] + subprocess.call(command) if config.service_mode == "epd": with ( RemoteEPDServer(vllm_serve_args=config.epd_server_cmds, env_dict=config.envs) as _,