diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index cf1398c36..d99d68bc7 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -401,3 +401,25 @@ jobs: done echo "All jobs completed successfully" exit 0 + + unit-test-backend-8-gpu-b200: + needs: [check-changes, unit-test-frontend, unit-test-backend-2-gpu] + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false && + needs.check-changes.outputs.src == 'true' + runs-on: b200-runner + strategy: + fail-fast: false + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + MODE_BLACKWELL=1 bash scripts/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-8-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh index 73721da65..aa00f86b5 100755 --- a/scripts/ci_install_dependency.sh +++ b/scripts/ci_install_dependency.sh @@ -2,44 +2,63 @@ # Install the dependency in CI. set -euxo pipefail +MODE_BLACKWELL=${MODE_BLACKWELL:-0} + +CU_VERSION="cu126" +if [ "$MODE_BLACKWELL" = "1" ]; then + CU_VERSION="cu129" +fi + # Kill existing processes SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" bash "${SCRIPT_DIR}/killall_sglang.sh" +if ! command -v git >/dev/null 2>&1; then + apt update + apt install -y git +fi + # Update pip -pip install --upgrade pip +if [ "$MODE_BLACKWELL" != "1" ]; then + pip install --upgrade pip --break-system-packages +fi # Clean up existing installations -pip uninstall -y flashinfer flashinfer_python sgl-kernel sglang vllm || true +pip uninstall -y flashinfer flashinfer_python sgl-kernel sglang vllm --break-system-packages || true pip cache purge || true rm -rf /root/.cache/flashinfer +# TODO handle other python versions rm -rf /usr/local/lib/python3.10/dist-packages/flashinfer* rm -rf /usr/local/lib/python3.10/dist-packages/sgl_kernel* # Install the main package -pip install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/test/cu126 +pip install -e "python[dev]" --extra-index-url https://download.pytorch.org/whl/test/${CU_VERSION} --break-system-packages # Show current packages pip list # Install additional dependencies -pip install mooncake-transfer-engine==0.3.5 nvidia-cuda-nvrtc-cu12 +pip install mooncake-transfer-engine==0.3.5 nvidia-cuda-nvrtc-cu12 --break-system-packages -# For lmms_evals evaluating MMMU -git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git -pip install -e lmms-eval/ +if [ "$MODE_BLACKWELL" != "1" ]; then + # For lmms_evals evaluating MMMU + git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git + pip install -e lmms-eval/ --break-system-packages +fi # Install FlashMLA for attention backend tests -# pip install git+https://github.com/deepseek-ai/FlashMLA.git +# pip install git+https://github.com/deepseek-ai/FlashMLA.git --break-system-packages # Install hf_xet -pip install huggingface_hub[hf_xet] +pip install huggingface_hub[hf_xet] --break-system-packages -# Install xformers -pip install -U xformers --index-url https://download.pytorch.org/whl/cu126 --no-deps --force-reinstall +if [ "$MODE_BLACKWELL" != "1" ]; then + # Install xformers + pip install -U xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps --force-reinstall --break-system-packages +fi # To help dumping traces when timeout occurred -pip install py-spy +pip install py-spy --break-system-packages # Show current packages pip list diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 93b818966..aecea4498 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -203,6 +203,9 @@ suites = { "per-commit-8-gpu-amd": [ TestFile("test_full_deepseek_v3.py", 250), ], + "per-commit-8-gpu-b200": [ + # add more here + ], "per-commit-cpu": [ TestFile("cpu/test_activation.py"), TestFile("cpu/test_binding.py"),