[CI] Add nightly CI test cases for the GLM-5 (#7429)
### What this PR does / why we need it?
Add nightly CI test cases for the GLM-5
Add model download for the GLM-5
https://github.com/vllm-project/vllm-ascend/actions/runs/23286178651/job/67710409642#logs
- vLLM version: v0.17.0
- vLLM main:
b31e9326a7
---------
Signed-off-by: liuhaiyang27 <liuhaiyang27@huawei.com>
Signed-off-by: liuhy1213-cell <liuhy1213@gmail.com>
Co-authored-by: liuhaiyang27 <liuhaiyang27@huawei.com>
This commit is contained in:
1
.github/workflows/misc/model_list.json
vendored
1
.github/workflows/misc/model_list.json
vendored
@@ -241,6 +241,7 @@
|
|||||||
"wemaster/deepseek_mtp_main_random_bf16",
|
"wemaster/deepseek_mtp_main_random_bf16",
|
||||||
"wemaster/deepseek_mtp_main_random_w8a8_part",
|
"wemaster/deepseek_mtp_main_random_w8a8_part",
|
||||||
"xlangai/OpenCUA-7B",
|
"xlangai/OpenCUA-7B",
|
||||||
|
"Eco-Tech/GLM-5-w4a8",
|
||||||
"Eco-Tech/GLM-4.7-W8A8-floatmtp",
|
"Eco-Tech/GLM-4.7-W8A8-floatmtp",
|
||||||
"MiniMax/MiniMax-M2.5"
|
"MiniMax/MiniMax-M2.5"
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -258,6 +258,9 @@ jobs:
|
|||||||
- name: deepseek-v3-2-w8a8
|
- name: deepseek-v3-2-w8a8
|
||||||
os: linux-aarch64-a3-16
|
os: linux-aarch64-a3-16
|
||||||
config_file_path: DeepSeek-V3.2-W8A8.yaml
|
config_file_path: DeepSeek-V3.2-W8A8.yaml
|
||||||
|
- name: glm-5-w4a8
|
||||||
|
os: linux-aarch64-a3-16
|
||||||
|
config_file_path: GLM-5.yaml
|
||||||
- name: glm-4.7-w8a8
|
- name: glm-4.7-w8a8
|
||||||
os: linux-aarch64-a3-16
|
os: linux-aarch64-a3-16
|
||||||
config_file_path: GLM-4.7.yaml
|
config_file_path: GLM-4.7.yaml
|
||||||
|
|||||||
@@ -3,13 +3,12 @@
|
|||||||
# ==========================================
|
# ==========================================
|
||||||
|
|
||||||
_envs: &envs
|
_envs: &envs
|
||||||
HCCL_BUFFSIZE: "200"
|
HCCL_BUFFSIZE: "1024"
|
||||||
SERVER_PORT: "DEFAULT_PORT"
|
SERVER_PORT: "DEFAULT_PORT"
|
||||||
HCCL_OP_EXPANSION_MODE: "AIV"
|
HCCL_OP_EXPANSION_MODE: "AIV"
|
||||||
OMP_PROC_BIND: "false"
|
OMP_PROC_BIND: "false"
|
||||||
OMP_NUM_THREADS: "1"
|
OMP_NUM_THREADS: "1"
|
||||||
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
|
||||||
VLLM_ASCEND_BALANCE_SCHEDULING: "1"
|
|
||||||
|
|
||||||
_server_cmd: &server_cmd
|
_server_cmd: &server_cmd
|
||||||
- "--enable-expert-parallel"
|
- "--enable-expert-parallel"
|
||||||
@@ -36,6 +35,9 @@ _server_cmd: &server_cmd
|
|||||||
- "--speculative-config"
|
- "--speculative-config"
|
||||||
- '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'
|
- '{"num_speculative_tokens": 3, "method": "deepseek_mtp"}'
|
||||||
|
|
||||||
|
_special_dependencies: &special_dependencies
|
||||||
|
transformers: "5.2.0"
|
||||||
|
|
||||||
_benchmarks: &benchmarks
|
_benchmarks: &benchmarks
|
||||||
acc:
|
acc:
|
||||||
case_type: accuracy
|
case_type: accuracy
|
||||||
@@ -65,19 +67,13 @@ _benchmarks: &benchmarks
|
|||||||
test_cases:
|
test_cases:
|
||||||
- name: "GLM-5-TP16-DP1-decodegraph"
|
- name: "GLM-5-TP16-DP1-decodegraph"
|
||||||
model: "Eco-Tech/GLM-5-w4a8"
|
model: "Eco-Tech/GLM-5-w4a8"
|
||||||
|
special_dependencies: *special_dependencies
|
||||||
envs:
|
envs:
|
||||||
<<: *envs
|
<<: *envs
|
||||||
server_cmd: *server_cmd
|
server_cmd: *server_cmd
|
||||||
server_cmd_extra:
|
server_cmd_extra:
|
||||||
- "--compilation-config"
|
- "--compilation-config"
|
||||||
- '{"cudagraph_capture": [4,8,12,16,20,24,28,32], "cudagraph_model":"FULL_DECODE_ONLY"}'
|
- '{"cudagraph_capture_sizes": [4,8,16,32,64,128,256,512], "cudagraph_mode": "FULL_DECODE_ONLY"}'
|
||||||
benchmarks:
|
benchmarks:
|
||||||
<<: *benchmarks
|
<<: *benchmarks
|
||||||
|
|
||||||
- name: "GLM-5-TP16-DP1-eager"
|
|
||||||
model: "Eco-Tech/GLM-5-w4a8"
|
|
||||||
envs:
|
|
||||||
<<: *envs
|
|
||||||
server_cmd: *server_cmd
|
|
||||||
benchmarks:
|
|
||||||
<<: *benchmarks
|
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ class SingleNodeConfig:
|
|||||||
name: str
|
name: str
|
||||||
model: str
|
model: str
|
||||||
envs: dict[str, Any] = field(default_factory=dict)
|
envs: dict[str, Any] = field(default_factory=dict)
|
||||||
|
special_dependencies: dict[str, Any] = field(default_factory=dict)
|
||||||
prompts: list[str] = field(default_factory=lambda: PROMPTS)
|
prompts: list[str] = field(default_factory=lambda: PROMPTS)
|
||||||
api_keyword_args: dict[str, Any] = field(default_factory=lambda: API_KEYWORD_ARGS)
|
api_keyword_args: dict[str, Any] = field(default_factory=lambda: API_KEYWORD_ARGS)
|
||||||
benchmarks: dict[str, Any] = field(default_factory=dict)
|
benchmarks: dict[str, Any] = field(default_factory=dict)
|
||||||
@@ -48,6 +49,8 @@ class SingleNodeConfig:
|
|||||||
self.api_keyword_args = API_KEYWORD_ARGS
|
self.api_keyword_args = API_KEYWORD_ARGS
|
||||||
if self.benchmarks is None:
|
if self.benchmarks is None:
|
||||||
self.benchmarks = {}
|
self.benchmarks = {}
|
||||||
|
if self.special_dependencies is None:
|
||||||
|
self.special_dependencies = {}
|
||||||
if self.test_content is None:
|
if self.test_content is None:
|
||||||
self.test_content = []
|
self.test_content = []
|
||||||
|
|
||||||
@@ -100,6 +103,7 @@ class SingleNodeConfigLoader:
|
|||||||
"name",
|
"name",
|
||||||
"model",
|
"model",
|
||||||
"envs",
|
"envs",
|
||||||
|
"special_dependencies",
|
||||||
"prompts",
|
"prompts",
|
||||||
"api_keyword_args",
|
"api_keyword_args",
|
||||||
"benchmarks",
|
"benchmarks",
|
||||||
@@ -169,6 +173,7 @@ class SingleNodeConfigLoader:
|
|||||||
name=case["name"],
|
name=case["name"],
|
||||||
model=case["model"],
|
model=case["model"],
|
||||||
envs=case.get("envs", {}),
|
envs=case.get("envs", {}),
|
||||||
|
special_dependencies=case.get("special_dependencies", {}),
|
||||||
server_cmd=full_cmd,
|
server_cmd=full_cmd,
|
||||||
epd_server_cmds=case.get("epd_server_cmds", []),
|
epd_server_cmds=case.get("epd_server_cmds", []),
|
||||||
epd_proxy_args=case.get("epd_proxy_args", []),
|
epd_proxy_args=case.get("epd_proxy_args", []),
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ from typing import Any
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
import pytest
|
import pytest
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
from tests.e2e.conftest import DisaggEpdProxy, RemoteEPDServer, RemoteOpenAIServer
|
from tests.e2e.conftest import DisaggEpdProxy, RemoteEPDServer, RemoteOpenAIServer
|
||||||
from tests.e2e.nightly.single_node.models.scripts.single_node_config import (
|
from tests.e2e.nightly.single_node.models.scripts.single_node_config import (
|
||||||
@@ -144,6 +146,15 @@ def _run_benchmarks(config: SingleNodeConfig, port: int) -> None:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.parametrize("config", configs, ids=[config.name for config in configs])
|
@pytest.mark.parametrize("config", configs, ids=[config.name for config in configs])
|
||||||
async def test_single_node(config: SingleNodeConfig) -> None:
|
async def test_single_node(config: SingleNodeConfig) -> None:
|
||||||
|
# TODO: remove this part after the transformers version upgraded
|
||||||
|
if config.special_dependencies:
|
||||||
|
for k, v in config.special_dependencies.items():
|
||||||
|
command = [
|
||||||
|
sys.executable,
|
||||||
|
"-m", "pip", "install",
|
||||||
|
f"{k}=={v}",
|
||||||
|
]
|
||||||
|
subprocess.call(command)
|
||||||
if config.service_mode == "epd":
|
if config.service_mode == "epd":
|
||||||
with (
|
with (
|
||||||
RemoteEPDServer(vllm_serve_args=config.epd_server_cmds, env_dict=config.envs) as _,
|
RemoteEPDServer(vllm_serve_args=config.epd_server_cmds, env_dict=config.envs) as _,
|
||||||
|
|||||||
Reference in New Issue
Block a user