name: 'e2e test' on: workflow_call: inputs: vllm: required: true type: string image: required: true type: string type: required: true type: string contains_310: required: true type: boolean jobs: e2e-light: name: singlecard-light if: ${{ inputs.type == 'light' }} runs-on: linux-aarch64-a2b3-1 strategy: fail-fast: false matrix: part: [0] container: image: ${{ inputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev clang-15 update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run vllm-project/vllm-ascend test env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn run: | python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 e2e-full: name: singlecard-full if: ${{ inputs.type == 'full' }} runs-on: linux-aarch64-a2b3-1 strategy: fail-fast: false matrix: part: [0, 1] container: image: ${{ inputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev clang-15 update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run e2e test env: VLLM_WORKER_MULTIPROC_METHOD: spawn PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 run: | python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 e2e-2-cards-light: name: multicard-2-light if: ${{ inputs.type == 'light' }} runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HCCL_BUFFSIZE: 1024 HF_HUB_OFFLINE: 1 steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev clang-15 update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run vllm-project/vllm-ascend test (light) env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | python3 .github/workflows/scripts/run_suite.py --suite e2e-2card-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 e2e-2-cards-full: name: multicard-2-full if: ${{ inputs.type == 'full' }} runs-on: linux-aarch64-a3-2 strategy: fail-fast: false matrix: part: [0] container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HCCL_BUFFSIZE: 1024 HF_HUB_OFFLINE: 1 steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev clang-15 update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run vllm-project/vllm-ascend test (full) env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-2-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 - name: Run vllm-project/vllm-ascend test (non triton) if: ${{ inputs.type == 'full' && matrix.part == 0 }} env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | python3 -m pip uninstall -y triton-ascend pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py e2e-4-cards-full: name: multicard-4-full if: ${{ inputs.type == 'full' }} runs-on: linux-aarch64-a3-4 strategy: fail-fast: false matrix: part: [0] container: image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev clang-15 update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run vllm-project/vllm-ascend test for V1 Engine env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-4-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 e2e_310p: name: 310p singlecard runs-on: linux-aarch64-310p-1 if: ${{ inputs.contains_310 }} container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run vllm-project/vllm-ascend test env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn run: | pytest -sv --durations=0 tests/e2e/310p/singlecard/test_dense_model_singlecard.py e2e_310p-4cards: name: 310p multicards 4cards runs-on: linux-aarch64-310p-4 if: ${{ inputs.contains_310 }} container: image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-310p-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: - name: Check npu and CANN info run: | npu-smi info cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - name: Config mirrors run: | sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local apt-get update -y apt install git -y - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 - name: Install system dependencies run: | apt-get -y install `cat packages.txt` apt-get -y install gcc g++ cmake libnuma-dev - name: Checkout vllm-project/vllm repo uses: actions/checkout@v6 with: repository: vllm-project/vllm ref: ${{ inputs.vllm }} path: ./vllm-empty fetch-depth: 1 - name: Install vllm-project/vllm from source working-directory: ./vllm-empty run: | VLLM_TARGET_DEVICE=empty pip install -e . - name: Install vllm-project/vllm-ascend env: PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi run: | pip install -r requirements-dev.txt pip install -v -e . - name: Run vllm-project/vllm-ascend test env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn run: | pytest -sv --durations=0 \ tests/e2e/310p/multicard/test_dense_model_multicard.py \ tests/e2e/310p/multicard/test_moe_model_multicard.py