[feature] enable NPU CI (#7935)
Co-authored-by: Even Zhou <14368888+iforgetmyname@users.noreply.github.com>
This commit is contained in:
64
.github/workflows/pr-test-npu.yml
vendored
Normal file
64
.github/workflows/pr-test-npu.yml
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
name: PR Test (Ascend NPU)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/**"
|
||||
- "test/**"
|
||||
- ".github/workflows/pr-test-npu.yml"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/**"
|
||||
- "test/**"
|
||||
- ".github/workflows/pr-test-npu.yml"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-test-npu-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
unit-test-basic:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: linux-arm64-npu-1
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/npu_ci_install_dependency.sh
|
||||
# copy required dataset file from our daily cache
|
||||
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
SGLANG_USE_MODELSCOPE: true
|
||||
HF_ENDPOINT: https://hf-mirror.com
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-npu
|
||||
finish:
|
||||
if: always()
|
||||
needs: [ unit-test-basic ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
@@ -38,7 +38,7 @@ repos:
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies: ['tomli']
|
||||
args: ['--toml', 'python/pyproject.toml']
|
||||
args: ['--toml', 'python/pyproject.toml', '-L', 'cann']
|
||||
exclude: test/srt/test_reasoning_parser.py # Exclude the test file that is expected to fail
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: v18.1.8
|
||||
|
||||
46
scripts/npu_ci_install_dependency.sh
Executable file
46
scripts/npu_ci_install_dependency.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Install the required dependencies in CI.
|
||||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
|
||||
apt update -y
|
||||
apt install -y build-essential cmake python3-pip python3-dev wget net-tools zlib1g-dev lld clang software-properties-common
|
||||
|
||||
|
||||
pip config set global.index-url https://mirrors.huaweicloud.com/repository/pypi/simple
|
||||
python3 -m pip install --upgrade pip
|
||||
pip uninstall sgl-kernel -y || true
|
||||
|
||||
|
||||
### Download MemFabricV2
|
||||
MF_WHL_NAME="mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl"
|
||||
MEMFABRIC_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com:443/sglang/${MF_WHL_NAME}"
|
||||
wget "${MEMFABRIC_URL}" && pip install "./${MF_WHL_NAME}"
|
||||
|
||||
|
||||
### Install vLLM
|
||||
VLLM_TAG=v0.8.5
|
||||
git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG
|
||||
(cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v -e .)
|
||||
|
||||
|
||||
### Install PyTorch and PTA
|
||||
PYTORCH_VERSION=2.6.0
|
||||
TORCHVISION_VERSION=0.21.0
|
||||
PTA_VERSION=2.6.0rc1
|
||||
pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu
|
||||
pip install torch_npu==$PTA_VERSION
|
||||
|
||||
|
||||
### Install Triton-Ascend
|
||||
TRITON_ASCEND_VERSION=3.2.0rc2
|
||||
pip install attrs==24.2.0 numpy==1.26.4 scipy==1.13.1 decorator==5.1.1 psutil==6.0.0 pytest==8.3.2 pytest-xdist==3.6.1 pyyaml pybind11
|
||||
pip install triton-ascend==$TRITON_ASCEND_VERSION
|
||||
|
||||
|
||||
pip install -e "python[srt_npu]"
|
||||
|
||||
|
||||
### Modify PyTorch TODO: to be removed later
|
||||
TORCH_LOCATION=$(python3 -c 'import torch; print(torch.__path__[0])')
|
||||
sed -i 's/from triton.runtime.autotuner import OutOfResources/from triton.runtime.errors import OutOfResources/' "${TORCH_LOCATION}/_inductor/runtime/triton_heuristics.py"
|
||||
@@ -20,22 +20,10 @@ from sglang.test.test_utils import (
|
||||
run_bench_offline_throughput,
|
||||
)
|
||||
|
||||
DEFAULT_MODEL_NAME_FOR_TEST = "Qwen/Qwen2.5-7B-Instruct"
|
||||
|
||||
|
||||
class TestAscendAttnBackend(CustomTestCase):
|
||||
def test_latency(self):
|
||||
output_throughput = run_bench_offline_throughput(
|
||||
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||
[
|
||||
"--attention-backend",
|
||||
"ascend",
|
||||
],
|
||||
)
|
||||
|
||||
print(f"{output_throughput=}")
|
||||
|
||||
if is_in_ci():
|
||||
self.assertGreater(output_throughput, 18)
|
||||
|
||||
def test_gsm8k(self):
|
||||
model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
base_url = DEFAULT_URL_FOR_TEST
|
||||
|
||||
Reference in New Issue
Block a user