### What this PR does / why we need it?
Add nightly CI test cases for the GLM-5
Add model download for the GLM-5
https://github.com/vllm-project/vllm-ascend/actions/runs/23286178651/job/67710409642#logs
- vLLM version: v0.17.0
- vLLM main:
b31e9326a7
---------
Signed-off-by: liuhaiyang27 <liuhaiyang27@huawei.com>
Signed-off-by: liuhy1213-cell <liuhy1213@gmail.com>
Co-authored-by: liuhaiyang27 <liuhaiyang27@huawei.com>
249 lines
9.1 KiB
JSON
249 lines
9.1 KiB
JSON
{
|
|
"models": [
|
|
"AngelSlim/Qwen3-32B_eagle3",
|
|
"AngelSlim/Qwen3-a3B_eagle3",
|
|
"Anionex/Qwen3-1.7B-W4A8-V1",
|
|
"ArthurZ/ilama-3.2-1B",
|
|
"BAAI/bge-base-en-v1.5",
|
|
"BAAI/bge-large-zh-v1.5",
|
|
"BAAI/bge-m3",
|
|
"BAAI/bge-multilingual-gemma2",
|
|
"BAAI/bge-reranker-large",
|
|
"BAAI/bge-reranker-v2-m3",
|
|
"BAAI/bge-small-en-v1.5",
|
|
"BAAI/kernel_meta",
|
|
"ByteDance-Seed/BAGEL-7B-MoT",
|
|
"DeepSeek-ai/DeepSeek-OCR",
|
|
"DevQuasar/deepseek-ai.DeepSeek-V3.2-BF16",
|
|
"Eco-Tech/DeepSeek-V3.1-w8a8-mtp-QuaRot",
|
|
"Eco-Tech/Qwen3-30B-A3B-w8a8",
|
|
"Eco-Tech/Kimi-K2.5-W4A8",
|
|
"Howeee/Qwen2.5-1.5B-apeach",
|
|
"IntervitensInc/pangu-pro-moe-model",
|
|
"IntervitensInc/pangu-pro-moe-modelt",
|
|
"JackFram/llama-160m",
|
|
"JackFram/llama-68m",
|
|
"Kwai-Keye/Keye-VL-8B-Preview",
|
|
"LLM-Research/Llama-3.2-11B-Vision",
|
|
"LLM-Research/Llama-3.2-1B-Instruct",
|
|
"LLM-Research/Llama-3.2-3B-Instruct",
|
|
"LLM-Research/Meta-Llama-3-8B-Instruct",
|
|
"LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
|
"LLM-Research/Molmo-7B-D-0924",
|
|
"LLM-Research/Phi-4-mini-instruct",
|
|
"LLM-Research/gemma-2-9b-it",
|
|
"LLM-Research/gemma-3-4b-it",
|
|
"LLM-Research/kernel_meta",
|
|
"OpenBMB/MiniCPM-2B-dpo-bf16",
|
|
"OpenBMB/MiniCPM-Llama3-V-2_5",
|
|
"OpenBMB/MiniCPM3-4B",
|
|
"OpenBMB/MiniCPM4-0.5B",
|
|
"OpenGVLab/InternVL2-8B",
|
|
"OpenGVLab/InternVL2_5-8B",
|
|
"OpenGVLab/InternVL3-78B",
|
|
"OpenGVLab/InternVL3-8B",
|
|
"OpenGVLab/InternVL3_5-8B",
|
|
"OpenGVLab/InternVL3_5-8B-hf",
|
|
"PaddlePaddle/ERNIE-4.5-21B-A3B-PT",
|
|
"PaddlePaddle/PaddleOCR-VL",
|
|
"QuantTrio/Qwen3-VL-235B-A22B-Instruct-AWQ",
|
|
"Qwen/QwQ-32B",
|
|
"Qwen/QwQ-32B-AWQ",
|
|
"Qwen/Qwen",
|
|
"Qwen/Qwen-Image",
|
|
"Qwen/Qwen1.5-MoE-A2.7B",
|
|
"Qwen/Qwen2-1.5B-Instruct",
|
|
"Qwen/Qwen2-7B",
|
|
"Qwen/Qwen2-7B-Instruct",
|
|
"Qwen/Qwen2-7B-W8A8",
|
|
"Qwen/Qwen2-Audio-7B-Instruct",
|
|
"Qwen/Qwen2-VL-2B-Instruct",
|
|
"Qwen/Qwen2-VL-7B",
|
|
"Qwen/Qwen2-VL-7B-Instruct",
|
|
"Qwen/Qwen2.5-0.5B-Instruct",
|
|
"Qwen/Qwen2.5-0.5B-Instruct-AWQ",
|
|
"Qwen/Qwen2.5-1.5B-Instruct",
|
|
"Qwen/Qwen2.5-14B-Instruct",
|
|
"Qwen/Qwen2.5-32B-Instruct",
|
|
"Qwen/Qwen2.5-7B",
|
|
"Qwen/Qwen2.5-7B-Instruct",
|
|
"Qwen/Qwen2.5-7B-Instruct-1M",
|
|
"Qwen/Qwen2.5-7b-Instruct",
|
|
"Qwen/Qwen2.5-Math-PRM-7B",
|
|
"Qwen/Qwen2.5-Omni-3B",
|
|
"Qwen/Qwen2.5-Omni-7B",
|
|
"Qwen/Qwen2.5-VL-32B-Instruct",
|
|
"Qwen/Qwen2.5-VL-3B-Instruct",
|
|
"Qwen/Qwen2.5-VL-7B-Instruct",
|
|
"Qwen/Qwen2.7-7B",
|
|
"Qwen/Qwen3-0.6B",
|
|
"Qwen/Qwen3-0.6B-Base",
|
|
"Qwen/Qwen3-235B-A22B",
|
|
"Qwen/Qwen3-235B-A22B-Instruct-2507",
|
|
"Qwen/Qwen3-30B-A3B",
|
|
"Qwen/Qwen3-30B-A3B-Instruct-2507",
|
|
"Qwen/Qwen3-30B-A3B-W8A8",
|
|
"Qwen/Qwen3-32B",
|
|
"Qwen/Qwen3-32B-AWQ",
|
|
"Qwen/Qwen3-8B",
|
|
"Qwen/Qwen3-8B-A3B",
|
|
"Qwen/Qwen3-8B-Base",
|
|
"Qwen/Qwen3-8B-W8A8",
|
|
"Qwen/Qwen3-8B-w4a8",
|
|
"Qwen/Qwen3-8B-w8a8",
|
|
"Qwen/Qwen3-Base",
|
|
"Qwen/Qwen3-Coder-30B-A3B-Instruct",
|
|
"Qwen/Qwen3-Embedding-0.6B",
|
|
"Qwen/Qwen3-Embedding-8B",
|
|
"Qwen/Qwen3-Next-80B-A3B-Instruct",
|
|
"Qwen/Qwen3-Next-A3B-Instruct",
|
|
"Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
|
"Qwen/Qwen3-Reranker-0.6B",
|
|
"Qwen/Qwen3-VL-235B-A22B-Instruct",
|
|
"Qwen/Qwen3-VL-2B-Instruct",
|
|
"Qwen/Qwen3-VL-30B-A3B-Instruct",
|
|
"Qwen/Qwen3-VL-32B-Instruct",
|
|
"Qwen/Qwen3-VL-8B-Instruct",
|
|
"Qwen/Qwen3.5-27B",
|
|
"Qwen/Qwen3.5-35B-A3B",
|
|
"RedHatAI/Qwen3-32B-speculator.eagle3",
|
|
"RedHatAI/Qwen3-8B-speculator.eagle3",
|
|
"Shanghai_AI_Laboratory/internlm--chat-7b",
|
|
"Shanghai_AI_Laboratory/internlm-7b",
|
|
"Shanghai_AI_Laboratory/internlm-7b-chat",
|
|
"Shanghai_AI_Laboratory/internlm-7bi-chat",
|
|
"Shanghai_AI_Laboratory/internlm-chat-7b",
|
|
"Tencent-Hunyuan/HunyuanOCR",
|
|
"Tengyunw/qwen3_8b_eagle3",
|
|
"Tongyi-MAI/Z-Image-Turbo",
|
|
"baichuan-inc/Baichuan2-7B-Chat",
|
|
"billy800/Qwen3-30B-A3B-Instruct-2507-AWQ",
|
|
"deepseek-ai/DeepSeek-OCR",
|
|
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
|
"deepseek-ai/DeepSeek-V2",
|
|
"deepseek-ai/DeepSeek-V2-Lite",
|
|
"deepseek-ai/DeepSeek-V2-Lite-Chat",
|
|
"deepseek-ai/Deepseek-V2-Lite",
|
|
"dengcao/ms-marco-MiniLM-L6-v2",
|
|
"facebook/opt-125m",
|
|
"google/gemma-2-9b",
|
|
"google/gemma-3n-E2B-it",
|
|
"google/siglip2-base-patch16-224",
|
|
"hmellor/Ilama-3.2-1B",
|
|
"ibm-research/PowerMoE-3b",
|
|
"intfloat/multilingual-e5-small",
|
|
"jason9693/Qwen2.5-1.5B-apeach",
|
|
"jinaai/jina-embeddings-v3",
|
|
"jinaai/jina-embeddings-v4",
|
|
"jinaai/jina-embeddings-v4-vllm-code",
|
|
"jinaai/jina-embeddings-v4-vllm-retrieval",
|
|
"kernel_meta/kernel_meta_temp_2116872659434949099",
|
|
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
|
"llava-hf/llava-1.5-7b-hf",
|
|
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
|
"llava-hf/llava-v1.6-mistral-7b-hf",
|
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
"mistralai/Ministral-3-3B-Instruct-2512-BF16",
|
|
"mistralai/Ministral-3-8B-Instruct-2512-BF16",
|
|
"mistralai/Mistral-7B-Instruct-v0.1",
|
|
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
|
"mlx-community/DeepSeek-V3-3bit-bf16",
|
|
"moonshotai/Kimi-K2-Thinking",
|
|
"moonshotai/Kimi-Linear-48B-A3B-Instruct",
|
|
"neuralmagic/Qwen2.5-3B-quantized.w8a8",
|
|
"MNN/Qwen3-VL-8B-Instruct-Eagle3",
|
|
"nv-community/audio-flamingo-3",
|
|
"nv-community/audio-flamingo-3-hf",
|
|
"nvidia/audio-flamingo-3-hf",
|
|
"openbmb/MiniCPM-2B-sft-bf16",
|
|
"openbmb/MiniCPM-V-2_6",
|
|
"openbmb/MiniCPM-V-4_5",
|
|
"opendatalab/MinerU2.5-2509-1.2B",
|
|
"rhymes-ai/Aria",
|
|
"sentence-transformers/all-MiniLM-L12-v2",
|
|
"tencent/HunyuanOCR",
|
|
"unsloth/DeepSeek-V3.1-BF16",
|
|
"unsloth/Kimi-K2-Thinking-BF16",
|
|
"unsloth/gpt-oss-20b-BF16",
|
|
"vllm-ascend/DeepSeek-R1-0528-W8A8",
|
|
"vllm-ascend/DeepSeek-R1-W8A8",
|
|
"vllm-ascend/DeepSeek-R1-fa3-pruning",
|
|
"vllm-ascend/DeepSeek-R1-w4a8-pruning",
|
|
"vllm-ascend/DeepSeek-V2-Lite",
|
|
"vllm-ascend/DeepSeek-V2-Lite-W8A8",
|
|
"vllm-ascend/DeepSeek-V3-Pruning",
|
|
"vllm-ascend/DeepSeek-V3-W4A8-Pruing",
|
|
"vllm-ascend/DeepSeek-V3-W8A8",
|
|
"vllm-ascend/DeepSeek-V3.1",
|
|
"vllm-ascend/DeepSeek-V3.1-W4A8-puring",
|
|
"vllm-ascend/DeepSeek-V3.1-W8A8",
|
|
"vllm-ascend/DeepSeek-V3.2-W8A8",
|
|
"vllm-ascend/DeepSeek-V3.2-W8A8-Pruning",
|
|
"vllm-ascend/EAGLE-LLaMA3.1-Instruct-8B",
|
|
"vllm-ascend/EAGLE3-LLaMA3.1-Instruct-8B",
|
|
"vllm-ascend/Kimi-K2-Instruct-W8A8",
|
|
"vllm-ascend/Kimi-K2-Thinking-Pruning",
|
|
"vllm-ascend/Llama-2-7b-hf",
|
|
"vllm-ascend/Llama-3.2-3B-Instruct",
|
|
"vllm-ascend/Meta-Llama-3-8B-Instruct",
|
|
"vllm-ascend/QwQ-32B-W8A8",
|
|
"vllm-ascend/QwQ-32B-w8a8",
|
|
"vllm-ascend/Qwen2-7B-W8A8",
|
|
"vllm-ascend/Qwen2-VL-7B-W8A8",
|
|
"vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8",
|
|
"vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8-new",
|
|
"vllm-ascend/Qwen2.5-0.5B-Instruct-fa3",
|
|
"vllm-ascend/Qwen2.5-0.5B-Instruct-w8a8",
|
|
"vllm-ascend/Qwen2.5-Omni-7B",
|
|
"vllm-ascend/Qwen3-0.6B",
|
|
"vllm-ascend/Qwen3-0.6B-Instruct-W8A8",
|
|
"vllm-ascend/Qwen3-0.6B-W8A16",
|
|
"vllm-ascend/Qwen3-0.6B-W8A8",
|
|
"vllm-ascend/Qwen3-1.7B-W4A8-V1",
|
|
"vllm-ascend/Qwen3-235B-A22B",
|
|
"vllm-ascend/Qwen3-235B-A22B-W4A8",
|
|
"vllm-ascend/Qwen3-235B-A22B-W8A8",
|
|
"vllm-ascend/Qwen3-235B-A22B-w8a8",
|
|
"vllm-ascend/Qwen3-30B-A3B",
|
|
"vllm-ascend/Qwen3-a3B_eagle3",
|
|
"vllm-ascend/Qwen3-30B-A3B-Puring",
|
|
"vllm-ascend/Qwen3-30B-A3B-W8A8",
|
|
"vllm-ascend/Qwen3-30B-A3B-W8A8-Pruning",
|
|
"vllm-ascend/Qwen3-30B-A3B-W8A8-QuaRot",
|
|
"vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w8a8",
|
|
"vllm-ascend/Qwen3-30B-A3B-Instruct-2507-quantized.w4a8",
|
|
"vllm-ascend/Qwen3-32B-W4A4",
|
|
"vllm-ascend/Qwen3-32B-W8A8",
|
|
"vllm-ascend/Qwen3-32B-W8A8-QuaRot",
|
|
"vllm-ascend/Qwen3-8B",
|
|
"vllm-ascend/Qwen3-8B-W4A8",
|
|
"vllm-ascend/Qwen3-8B-W8A8",
|
|
"vllm-ascend/Qwen3-Next-80B-A3B-Instruct-W8A8",
|
|
"vllm-ascend/Qwen3-Next-80B-A3B-Instruct-W8A8-Pruning",
|
|
"vllm-ascend/Qwen3-Omni-30B-A3B-Thinking",
|
|
"vllm-ascend/Qwen3-VL-8B-Instruct",
|
|
"vllm-ascend/Qwen3-VL-8B-Instruct-W8A8",
|
|
"vllm-ascend/TinyLlama-1.1B-Chat-v0.3",
|
|
"vllm-ascend/benchmark",
|
|
"vllm-ascend/ilama-3.2-1B",
|
|
"vllm-ascend/ilama-text2sql-spider",
|
|
"vllm-ascend/kernel_meta",
|
|
"vllm-ascend/llama-160m",
|
|
"vllm-ascend/llama-160m-accelerator",
|
|
"vllm-ascend/llama-2-7b-sql-lora-test",
|
|
"vllm-ascend/llama-68m",
|
|
"vllm-ascend/llama32-3b-text2sql-spider",
|
|
"vllm-ascend/pangu-pro-moe-pruing",
|
|
"vllm-ascend/self_cognition_Alice",
|
|
"vllm-ascend/self_cognition_Bob",
|
|
"vllm-ascend/tinyllama-colorist-lora",
|
|
"vllm-ascend/vllm-eagle-llama-68m-random",
|
|
"wemaster/deepseek_mtp_main_random_bf16",
|
|
"wemaster/deepseek_mtp_main_random_w8a8_part",
|
|
"xlangai/OpenCUA-7B",
|
|
"Eco-Tech/GLM-5-w4a8",
|
|
"Eco-Tech/GLM-4.7-W8A8-floatmtp",
|
|
"MiniMax/MiniMax-M2.5"
|
|
]
|
|
}
|