diff --git a/.github/workflows/download_model.yaml b/.github/workflows/download_model.yaml new file mode 100644 index 00000000..4cb5b4a1 --- /dev/null +++ b/.github/workflows/download_model.yaml @@ -0,0 +1,75 @@ +name: 'model downloader' + +on: + pull_request: + paths: + - '.github/workflows/misc/model_list.json' + - '.github/workflows/download_model.yaml' + types: [labeled, synchronize] + +defaults: + run: + shell: bash -el {0} + +concurrency: + group: ascend-${{ github.workflow_ref }} + cancel-in-progress: true + +jobs: + download-models: + if: contains(github.event.pull_request.labels.*.name, 'model-download') + name: Download models from ModelScope + runs-on: linux-aarch64-a3-0 + container: + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-cpu + + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + pip install modelscope jq + + - name: Extract new models from PR + id: diff + run: | + set -euo pipefail + + JSON_PATH=".github/workflows/misc/model_list.json" + + git fetch origin main + + git show origin/main:$JSON_PATH > /tmp/models_main.json || \ + echo '{"models":[]}' > /tmp/models_main.json + + cp $JSON_PATH /tmp/models_pr.json + + jq -r ' + (.models // []) as $pr + | input + | (.models // []) as $main + | ($pr - $main)[] + ' /tmp/models_pr.json /tmp/models_main.json > /tmp/new_models.txt + + echo "New models:" + cat /tmp/new_models.txt || true + + - name: Download new models (CLI) + if: hashFiles('/tmp/new_models.txt') != '' + run: | + set -euo pipefail + + while read -r model; do + [ -z "$model" ] && continue + echo "▶ Downloading $model" + modelscope download "$model" + done < /tmp/new_models.txt + + - name: Summary + run: | + echo "Downloaded models:" + cat /tmp/new_models.txt || echo "No new models" diff --git a/.github/workflows/misc/model_list.json b/.github/workflows/misc/model_list.json new file mode 100644 index 00000000..64d07565 --- /dev/null +++ b/.github/workflows/misc/model_list.json @@ -0,0 +1,238 @@ +{ + "models": [ + "AngelSlim/Qwen3-32B_eagle3", + "Anionex/Qwen3-1.7B-W4A8-V1", + "ArthurZ/ilama-3.2-1B", + "BAAI/bge-base-en-v1.5", + "BAAI/bge-large-zh-v1.5", + "BAAI/bge-m3", + "BAAI/bge-multilingual-gemma2", + "BAAI/bge-reranker-large", + "BAAI/bge-reranker-v2-m3", + "BAAI/bge-small-en-v1.5", + "BAAI/kernel_meta", + "ByteDance-Seed/BAGEL-7B-MoT", + "DeepSeek-ai/DeepSeek-OCR", + "DevQuasar/deepseek-ai.DeepSeek-V3.2-BF16", + "Eco-Tech/DeepSeek-V3.1-w8a8-mtp-QuaRot", + "Eco-Tech/Qwen3-30B-A3B-w8a8", + "Howeee/Qwen2.5-1.5B-apeach", + "IntervitensInc/pangu-pro-moe-model", + "IntervitensInc/pangu-pro-moe-modelt", + "JackFram/llama-160m", + "JackFram/llama-68m", + "Kwai-Keye/Keye-VL-8B-Preview", + "LLM-Research/Llama-3.2-11B-Vision", + "LLM-Research/Llama-3.2-1B-Instruct", + "LLM-Research/Llama-3.2-3B-Instruct", + "LLM-Research/Llama-4-Scout-17B-16E-Instruct", + "LLM-Research/Llama-4-Scout-17B-16E-Instruct.", + "LLM-Research/Meta-Llama-3-8B-Instruct", + "LLM-Research/Meta-Llama-3.1-8B-Instruct", + "LLM-Research/Molmo-7B-D-0924", + "LLM-Research/Phi-4-mini-instruct", + "LLM-Research/gemma-2-9b-it", + "LLM-Research/gemma-3-4b-it", + "LLM-Research/kernel_meta", + "OpenBMB/MiniCPM-2B-dpo-bf16", + "OpenBMB/MiniCPM-Llama3-V-2_5", + "OpenBMB/MiniCPM3-4B", + "OpenBMB/MiniCPM4-0.5B", + "OpenGVLab/InternVL2-8B", + "OpenGVLab/InternVL2_5-8B", + "OpenGVLab/InternVL3-78B", + "OpenGVLab/InternVL3-8B", + "OpenGVLab/InternVL3_5-8B", + "OpenGVLab/InternVL3_5-8B-hf", + "PaddlePaddle/ERNIE-4.5-21B-A3B-PT", + "PaddlePaddle/PaddleOCR-VL", + "QuantTrio/Qwen3-VL-235B-A22B-Instruct-AWQ", + "Qwen/QwQ-32B", + "Qwen/QwQ-32B-AWQ", + "Qwen/Qwen", + "Qwen/Qwen-Image", + "Qwen/Qwen1.5-MoE-A2.7B", + "Qwen/Qwen2-1.5B-Instruct", + "Qwen/Qwen2-7B", + "Qwen/Qwen2-7B-Instruct", + "Qwen/Qwen2-7B-W8A8", + "Qwen/Qwen2-Audio-7B-Instruct", + "Qwen/Qwen2-VL-2B-Instruct", + "Qwen/Qwen2-VL-7B", + "Qwen/Qwen2-VL-7B-Instruct", + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen2.5-0.5B-Instruct-AWQ", + "Qwen/Qwen2.5-1.5B-Instruct", + "Qwen/Qwen2.5-14B-Instruct", + "Qwen/Qwen2.5-32B-Instruct", + "Qwen/Qwen2.5-7B", + "Qwen/Qwen2.5-7B-Instruct", + "Qwen/Qwen2.5-7B-Instruct-1M", + "Qwen/Qwen2.5-7b-Instruct", + "Qwen/Qwen2.5-Math-PRM-7B", + "Qwen/Qwen2.5-Omni-3B", + "Qwen/Qwen2.5-Omni-7B", + "Qwen/Qwen2.5-VL-32B-Instruct", + "Qwen/Qwen2.5-VL-3B-Instruct", + "Qwen/Qwen2.5-VL-7B-Instruct", + "Qwen/Qwen2.7-7B", + "Qwen/Qwen3-0.6B", + "Qwen/Qwen3-0.6B-Base", + "Qwen/Qwen3-235B-A22B", + "Qwen/Qwen3-235B-A22B-Instruct-2507", + "Qwen/Qwen3-30B-A3B", + "Qwen/Qwen3-30B-A3B-Instruct-2507", + "Qwen/Qwen3-30B-A3B-W8A8", + "Qwen/Qwen3-32B", + "Qwen/Qwen3-32B-AWQ", + "Qwen/Qwen3-8B", + "Qwen/Qwen3-8B-A3B", + "Qwen/Qwen3-8B-Base", + "Qwen/Qwen3-8B-W8A8", + "Qwen/Qwen3-8B-w4a8", + "Qwen/Qwen3-8B-w8a8", + "Qwen/Qwen3-Base", + "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "Qwen/Qwen3-Embedding-0.6B", + "Qwen/Qwen3-Embedding-8B", + "Qwen/Qwen3-Next-80B-A3B-Instruct", + "Qwen/Qwen3-Next-A3B-Instruct", + "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "Qwen/Qwen3-Reranker-0.6B", + "Qwen/Qwen3-VL-235B-A22B-Instruct", + "Qwen/Qwen3-VL-2B-Instruct", + "Qwen/Qwen3-VL-30B-A3B-Instruct", + "Qwen/Qwen3-VL-32B-Instruct", + "Qwen/Qwen3-VL-8B-Instruct", + "RedHatAI/Qwen3-32B-speculator.eagle3", + "RedHatAI/Qwen3-8B-speculator.eagle3", + "Shanghai_AI_Laboratory/internlm--chat-7b", + "Shanghai_AI_Laboratory/internlm-7b", + "Shanghai_AI_Laboratory/internlm-7b-chat", + "Shanghai_AI_Laboratory/internlm-7bi-chat", + "Shanghai_AI_Laboratory/internlm-chat-7b", + "Tencent-Hunyuan/HunyuanOCR", + "Tengyunw/qwen3_8b_eagle3", + "Tongyi-MAI/Z-Image-Turbo", + "baichuan-inc/Baichuan2-7B-Chat", + "billy800/Qwen3-30B-A3B-Instruct-2507-AWQ", + "deepseek-ai/DeepSeek-OCR", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", + "deepseek-ai/DeepSeek-V2", + "deepseek-ai/DeepSeek-V2-Lite", + "deepseek-ai/DeepSeek-V2-Lite-Chat", + "deepseek-ai/Deepseek-V2-Lite", + "dengcao/ms-marco-MiniLM-L6-v2", + "facebook/opt-125m", + "google/gemma-2-9b", + "google/gemma-3n-E2B-it", + "google/siglip2-base-patch16-224", + "hmellor/Ilama-3.2-1B", + "ibm-research/PowerMoE-3b", + "intfloat/multilingual-e5-small", + "jason9693/Qwen2.5-1.5B-apeach", + "jinaai/jina-embeddings-v3", + "jinaai/jina-embeddings-v4", + "jinaai/jina-embeddings-v4-vllm-code", + "jinaai/jina-embeddings-v4-vllm-retrieval", + "kernel_meta/kernel_meta_temp_2116872659434949099", + "llava-hf/LLaVA-NeXT-Video-7B-hf", + "llava-hf/llava-1.5-7b-hf", + "llava-hf/llava-onevision-qwen2-0.5b-ov-hf", + "llava-hf/llava-v1.6-mistral-7b-hf", + "meta-llama/Llama-3.2-1B-Instruct", + "mistralai/Ministral-3-3B-Instruct-2512-BF16", + "mistralai/Ministral-3-8B-Instruct-2512-BF16", + "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mistral-Small-3.1-24B-Instruct-2503", + "mlx-community/DeepSeek-V3-3bit-bf16", + "moonshotai/..__temp", + "moonshotai/Kimi-Linear-48B-A3B-Instruct", + "neuralmagic/Qwen2.5-3B-quantized.w8a8", + "nv-community/audio-flamingo-3", + "nv-community/audio-flamingo-3-hf", + "nvidia/audio-flamingo-3-hf", + "openbmb/MiniCPM-2B-sft-bf16", + "openbmb/MiniCPM-V-2_6", + "openbmb/MiniCPM-V-4_5", + "opendatalab/MinerU2.5-2509-1.2B", + "rhymes-ai/Aria", + "sentence-transformers/all-MiniLM-L12-v2", + "swift/Qwen3-235B-A22B-Instruct-2507-AWQ", + "tclf90/Qwen3-VL-235B-A22B-Instruct-AWQ", + "tencent/HunyuanOCR", + "unsloth/DeepSeek-V3.1-BF16", + "unsloth/Kimi-K2-Thinking-BF16", + "unsloth/gpt-oss-20b-BF16", + "vllm-ascend/DeepSeek-R1-0528-W8A8", + "vllm-ascend/DeepSeek-R1-W8A8", + "vllm-ascend/DeepSeek-R1-fa3-pruning", + "vllm-ascend/DeepSeek-R1-w4a8-pruning", + "vllm-ascend/DeepSeek-V2-Lite", + "vllm-ascend/DeepSeek-V2-Lite-W8A8", + "vllm-ascend/DeepSeek-V3-Pruning", + "vllm-ascend/DeepSeek-V3-W4A8-Pruing", + "vllm-ascend/DeepSeek-V3-W8A8", + "vllm-ascend/DeepSeek-V3.1", + "vllm-ascend/DeepSeek-V3.1-W4A8-puring", + "vllm-ascend/DeepSeek-V3.1-W8A8", + "vllm-ascend/DeepSeek-V3.2-W8A8", + "vllm-ascend/DeepSeek-V3.2-W8A8-Pruning", + "vllm-ascend/EAGLE-LLaMA3.1-Instruct-8B", + "vllm-ascend/EAGLE3-LLaMA3.1-Instruct-8B", + "vllm-ascend/Kimi-K2-Thinking-Pruning", + "vllm-ascend/Llama-2-7b-hf", + "vllm-ascend/Llama-3.2-3B-Instruct", + "vllm-ascend/Meta-Llama-3-8B-Instruct", + "vllm-ascend/QwQ-32B-W8A8", + "vllm-ascend/QwQ-32B-w8a8", + "vllm-ascend/Qwen2-7B-W8A8", + "vllm-ascend/Qwen2-VL-7B-W8A8", + "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8", + "vllm-ascend/Qwen2.5-0.5B-Instruct-W8A8-new", + "vllm-ascend/Qwen2.5-0.5B-Instruct-fa3", + "vllm-ascend/Qwen2.5-0.5B-Instruct-w8a8", + "vllm-ascend/Qwen2.5-Omni-7B", + "vllm-ascend/Qwen3-0.6B", + "vllm-ascend/Qwen3-0.6B-Instruct-W8A8", + "vllm-ascend/Qwen3-0.6B-W8A16", + "vllm-ascend/Qwen3-0.6B-W8A8", + "vllm-ascend/Qwen3-1.7B-W4A8-V1", + "vllm-ascend/Qwen3-235B-A22B", + "vllm-ascend/Qwen3-235B-A22B-W4A8", + "vllm-ascend/Qwen3-235B-A22B-W8A8", + "vllm-ascend/Qwen3-235B-A22B-w8a8", + "vllm-ascend/Qwen3-30B-A3B", + "vllm-ascend/Qwen3-30B-A3B-Puring", + "vllm-ascend/Qwen3-30B-A3B-W8A8", + "vllm-ascend/Qwen3-30B-A3B-W8A8-Pruning", + "vllm-ascend/Qwen3-32B-W4A4", + "vllm-ascend/Qwen3-32B-W8A8", + "vllm-ascend/Qwen3-8B", + "vllm-ascend/Qwen3-8B-W4A8", + "vllm-ascend/Qwen3-8B-W8A8", + "vllm-ascend/Qwen3-Next-80B-A3B-Instruct-W8A8", + "vllm-ascend/Qwen3-Next-80B-A3B-Instruct-W8A8-Pruning", + "vllm-ascend/Qwen3-Omni-30B-A3B-Thinking", + "vllm-ascend/Qwen3-VL-8B-Instruct", + "vllm-ascend/TinyLlama-1.1B-Chat-v0.3", + "vllm-ascend/benchmark", + "vllm-ascend/ilama-3.2-1B", + "vllm-ascend/ilama-text2sql-spider", + "vllm-ascend/kernel_meta", + "vllm-ascend/llama-160m", + "vllm-ascend/llama-160m-accelerator", + "vllm-ascend/llama-2-7b-sql-lora-test", + "vllm-ascend/llama-68m", + "vllm-ascend/llama32-3b-text2sql-spider", + "vllm-ascend/pangu-pro-moe-pruing", + "vllm-ascend/self_cognition_Alice", + "vllm-ascend/self_cognition_Bob", + "vllm-ascend/tinyllama-colorist-lora", + "vllm-ascend/vllm-eagle-llama-68m-random", + "wemaster/deepseek_mtp_main_random_bf16", + "wemaster/deepseek_mtp_main_random_w8a8_part", + "xlangai/OpenCUA-7B" + ] + } + \ No newline at end of file